RSKernelExpand.cpp revision e32af52d4be2bb80783404d99fa338b1143dbc9a
1db169187dea4602e4ad32058762d23d474753fd0Stephen Hines/*
2db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Copyright 2012, The Android Open Source Project
3db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
4db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Licensed under the Apache License, Version 2.0 (the "License");
5db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * you may not use this file except in compliance with the License.
6db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * You may obtain a copy of the License at
7db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
8db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *     http://www.apache.org/licenses/LICENSE-2.0
9db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
10db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Unless required by applicable law or agreed to in writing, software
11db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * distributed under the License is distributed on an "AS IS" BASIS,
12db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * See the License for the specific language governing permissions and
14db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * limitations under the License.
15db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */
16db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
176e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines#include "bcc/Assert.h"
18e198abec6c5e3eab380ccf6897b0a0b9c2dd92ddStephen Hines#include "bcc/Renderscript/RSTransforms.h"
197a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
207a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao#include <cstdlib>
2133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross#include <functional>
22e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross#include <unordered_set>
237a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
24b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DerivedTypes.h>
25b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Function.h>
26b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Instructions.h>
27b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/IRBuilder.h>
2818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser#include <llvm/IR/MDBuilder.h>
29b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Module.h>
30c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Pass.h>
317ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines#include <llvm/Support/raw_ostream.h>
32b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DataLayout.h>
33cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser#include <llvm/IR/Function.h>
34b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Type.h>
35806075b3a54af826fea78490fb213d8a0784138eTobias Grosser#include <llvm/Transforms/Utils/BasicBlockUtils.h>
36c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang
37c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include "bcc/Config/Config.h"
38ef73a242762bcd8113b9b65ceccbe7d909b5acbcZonr Chang#include "bcc/Support/Log.h"
39db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
40d88177580db4ddedf680854c51db333c97eabc59Stephen Hines#include "bcinfo/MetadataExtractor.h"
41d88177580db4ddedf680854c51db333c97eabc59Stephen Hines
424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala#ifndef __DISABLE_ASSERTS
434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala// Only used in bccAssert()
444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst int kNumExpandedForeachParams = 4;
454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst int kNumExpandedReduceParams = 3;
46e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Grossconst int kNumExpandedReduceNewAccumulatorParams = 4;
474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala#endif
484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst char kRenderScriptTBAARootName[] = "RenderScript Distinct TBAA";
504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst char kRenderScriptTBAANodeName[] = "RenderScript TBAA";
51bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
527a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaousing namespace bcc;
537a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
54db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace {
557a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
56354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hinesstatic const bool gEnableRsTbaa = true;
579c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines
584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala/* RSKernelExpandPass - This pass operates on functions that are able
594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * to be called via rsForEach(), "foreach_<NAME>", or
604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * "reduce_<NAME>". We create an inner loop for the function to be
614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * invoked over the appropriate data cells of the input/output
624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * allocations (adjusting other relevant parameters as we go). We
634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * support doing this for any forEach or reduce style compute
644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * kernels. The new function name is the original function name
654e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * followed by ".expand". Note that we still generate code for the
664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * original function.
677a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao */
684e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaclass RSKernelExpandPass : public llvm::ModulePass {
6933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grosspublic:
70db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  static char ID;
71db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
7233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grossprivate:
73e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  static const size_t RS_KERNEL_INPUT_LIMIT = 8; // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h
74e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
75e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  typedef std::unordered_set<llvm::Function *> FunctionSet;
76e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
77e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  enum RsLaunchDimensionsField {
78e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldX,
79e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldY,
80e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldZ,
81e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldLod,
82e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldFace,
83e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldArray,
84e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
85e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldCount
86e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  };
87e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
88e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  enum RsExpandKernelDriverInfoPfxField {
89e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInPtr,
90e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInStride,
91e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInLen,
92e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutPtr,
93e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutStride,
94e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutLen,
95e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldDim,
96e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldCurrent,
97e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldUsr,
98e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldUsLenr,
99e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
100e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldCount
101e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  };
10233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
103bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::Module *Module;
104bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::LLVMContext *Context;
105bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
106bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  /*
1074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala   * Pointers to LLVM type information for the the function signatures
1084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala   * for expanded functions. These must be re-calculated for each module
1094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala   * the pass is run on.
110bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes   */
1114e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  llvm::FunctionType *ExpandedForEachType, *ExpandedReduceType;
112e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  llvm::Type *RsExpandKernelDriverInfoPfxTy;
113db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
11425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  uint32_t mExportForEachCount;
11525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  const char **mExportForEachNameList;
11625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  const uint32_t *mExportForEachSignatureList;
117cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines
1184e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  uint32_t mExportReduceCount;
1194e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  const char **mExportReduceNameList;
1204e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
1212b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // Turns on optimization of allocation stride values.
1222b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  bool mEnableStepOpt;
1232b04086acbef6520ae2c54a868b1271abf053122Stephen Hines
124bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  uint32_t getRootSignature(llvm::Function *Function) {
125db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    const llvm::NamedMDNode *ExportForEachMetadata =
126bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes        Module->getNamedMetadata("#rs_export_foreach");
127db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
128db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (!ExportForEachMetadata) {
129db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      llvm::SmallVector<llvm::Type*, 8> RootArgTys;
130bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      for (llvm::Function::arg_iterator B = Function->arg_begin(),
131bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                                        E = Function->arg_end();
132db169187dea4602e4ad32058762d23d474753fd0Stephen Hines           B != E;
133db169187dea4602e4ad32058762d23d474753fd0Stephen Hines           ++B) {
134db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        RootArgTys.push_back(B->getType());
135db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      }
136db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
137db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // For pre-ICS bitcode, we may not have signature information. In that
138db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // case, we use the size of the RootArgTys to select the number of
139db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // arguments.
140db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      return (1 << RootArgTys.size()) - 1;
141db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
142db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1437ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    if (ExportForEachMetadata->getNumOperands() == 0) {
1447ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      return 0;
1457ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
1467ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
1476e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines    bccAssert(ExportForEachMetadata->getNumOperands() > 0);
148db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
149cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // We only handle the case for legacy root() functions here, so this is
150cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // hard-coded to look at only the first such function.
151db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
152900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    if (SigNode != nullptr && SigNode->getNumOperands() == 1) {
1531bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines      llvm::Metadata *SigMD = SigNode->getOperand(0);
1541bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines      if (llvm::MDString *SigS = llvm::dyn_cast<llvm::MDString>(SigMD)) {
1551bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines        llvm::StringRef SigString = SigS->getString();
156db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        uint32_t Signature = 0;
157db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        if (SigString.getAsInteger(10, Signature)) {
158db169187dea4602e4ad32058762d23d474753fd0Stephen Hines          ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
159db169187dea4602e4ad32058762d23d474753fd0Stephen Hines          return 0;
160db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        }
161db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        return Signature;
162db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      }
163db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
164db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
165db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return 0;
166db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
167db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
168429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray  bool isStepOptSupported(llvm::Type *AllocType) {
169429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
170429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
171429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
172429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
173429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (mEnableStepOpt) {
174429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
175429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
176429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
177429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType == VoidPtrTy) {
178429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
179429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
180429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
181429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (!PT) {
182429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
183429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
184429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
185429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // remaining conditions are 64-bit only
186429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (VoidPtrTy->getPrimitiveSizeInBits() == 32) {
187429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return true;
188429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
189429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
190429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // coerce suggests an upconverted struct type, which we can't support
191429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) {
192429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
193429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
194429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
195429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported
196429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2);
197429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128);
198429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType == V2xi64Ty || AllocType == Int128Ty) {
199429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
200429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
201429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
202429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    return true;
203429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray  }
204429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
2052b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // Get the actual value we should use to step through an allocation.
2067b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  //
2077b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // Normally the value we use to step through an allocation is given to us by
2087b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // the driver. However, for certain primitive data types, we can derive an
2097b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // integer constant for the step value. We use this integer constant whenever
2107b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // possible to allow further compiler optimizations to take place.
2117b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  //
212b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines  // DL - Target Data size/layout information.
2132b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // T - Type of allocation (should be a pointer).
2142b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // OrigStep - Original step increment (root.expand() input from driver).
215bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
2162b04086acbef6520ae2c54a868b1271abf053122Stephen Hines                            llvm::Value *OrigStep) {
217b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines    bccAssert(DL);
218bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bccAssert(AllocType);
2192b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    bccAssert(OrigStep);
220bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
221429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (isStepOptSupported(AllocType)) {
2222b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      llvm::Type *ET = PT->getElementType();
223b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines      uint64_t ETSize = DL->getTypeAllocSize(ET);
224bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
2252b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      return llvm::ConstantInt::get(Int32Ty, ETSize);
2262b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    } else {
2272b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      return OrigStep;
2282b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    }
2292b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  }
2302b04086acbef6520ae2c54a868b1271abf053122Stephen Hines
231097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes  /// Builds the types required by the pass for the given context.
232bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  void buildTypes(void) {
233e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    // Create the RsLaunchDimensionsTy and RsExpandKernelDriverInfoPfxTy structs.
234bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
235e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8Ty                   = llvm::Type::getInt8Ty(*Context);
236e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8PtrTy                = Int8Ty->getPointerTo();
237e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8PtrArrayInputLimitTy = llvm::ArrayType::get(Int8PtrTy, RS_KERNEL_INPUT_LIMIT);
238e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32Ty                  = llvm::Type::getInt32Ty(*Context);
239e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32ArrayInputLimitTy   = llvm::ArrayType::get(Int32Ty, RS_KERNEL_INPUT_LIMIT);
240e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *VoidPtrTy                = llvm::Type::getInt8PtrTy(*Context);
241e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32Array4Ty            = llvm::ArrayType::get(Int32Ty, 4);
242097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
243097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes    /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h:
244db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *
245e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * struct RsLaunchDimensions {
246e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t x;
247db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   uint32_t y;
248db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   uint32_t z;
249e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t lod;
250e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t face;
251e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t array[4];
252e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * };
253e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     */
254e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::SmallVector<llvm::Type*, RsLaunchDimensionsFieldCount> RsLaunchDimensionsTypes;
255e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t x
256e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t y
257e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t z
258e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t lod
259e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t face
260e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Array4Ty); // uint32_t array[4]
261e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::StructType *RsLaunchDimensionsTy =
262e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        llvm::StructType::create(RsLaunchDimensionsTypes, "RsLaunchDimensions");
263e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
2641d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross    /* Defined as the beginning of RsExpandKernelDriverInfo in frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h:
265e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
266e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * struct RsExpandKernelDriverInfoPfx {
267e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT];
268e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t inStride[RS_KERNEL_INPUT_LIMIT];
269e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t inLen;
270e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
271e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT];
272e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t outStride[RS_KERNEL_INPUT_LIMIT];
273e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t outLen;
274e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
275e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // Dimension of the launch
276e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     RsLaunchDimensions dim;
277e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
278e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // The walking iterator of the launch
279e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     RsLaunchDimensions current;
280e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
281e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     const void *usr;
282e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t usrLen;
283e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
284e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // Items below this line are not used by the compiler and can be change in the driver.
285e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // So the compiler must assume there are an unknown number of fields of unknown type
286e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // beginning here.
287db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     * };
2881d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross     *
2891d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross     * The name "RsExpandKernelDriverInfoPfx" is known to RSInvariantPass (RSInvariant.cpp).
290db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     */
291e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::SmallVector<llvm::Type*, RsExpandKernelDriverInfoPfxFieldCount> RsExpandKernelDriverInfoPfxTypes;
292e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT]
293e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy);   // uint32_t inStride[RS_KERNEL_INPUT_LIMIT]
294e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t inLen
295e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT]
296e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy);   // uint32_t outStride[RS_KERNEL_INPUT_LIMIT]
297e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t outLen
298e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy);     // RsLaunchDimensions dim
299e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy);     // RsLaunchDimensions current
300e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy);                // const void *usr
301e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t usrLen
302e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    RsExpandKernelDriverInfoPfxTy =
303e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx");
304bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
305bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    // Create the function type for expanded kernels.
3064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
307bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
308e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *RsExpandKernelDriverInfoPfxPtrTy = RsExpandKernelDriverInfoPfxTy->getPointerTo();
3094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // void (const RsExpandKernelDriverInfoPfxTy *p, uint32_t x1, uint32_t x2, uint32_t outstep)
3104e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    ExpandedForEachType = llvm::FunctionType::get(VoidTy,
3114e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala        {RsExpandKernelDriverInfoPfxPtrTy, Int32Ty, Int32Ty, Int32Ty}, false);
312bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
3134e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // void (void *inBuf, void *outBuf, uint32_t len)
3144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    ExpandedReduceType = llvm::FunctionType::get(VoidTy, {VoidPtrTy, VoidPtrTy, Int32Ty}, false);
3158ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  }
3168ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
3174e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  /// @brief Create skeleton of the expanded foreach kernel.
318357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
319357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  /// This creates a function with the following signature:
320357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
321357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
3225010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes  ///         uint32_t outstep)
323357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
3244e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  llvm::Function *createEmptyExpandedForEachKernel(llvm::StringRef OldName) {
325bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
3264e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      llvm::Function::Create(ExpandedForEachType,
327bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                             llvm::GlobalValue::ExternalLinkage,
328bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                             OldName + ".expand", Module);
3294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
330bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
331bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("p");
332bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("x1");
333bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("x2");
334bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("arg_outstep");
3354e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
3364e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala                                                       ExpandedFunction);
3374e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::IRBuilder<> Builder(Begin);
3384e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    Builder.CreateRetVoid();
3394e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    return ExpandedFunction;
3404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  }
3414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
3424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // Create skeleton of the expanded reduce kernel.
3434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
3444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // This creates a function with the following signature:
3454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
3464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   void @func.expand(i8* nocapture %inBuf, i8* nocapture %outBuf, i32 len)
3474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
3484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  llvm::Function *createEmptyExpandedReduceKernel(llvm::StringRef OldName) {
3494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Function *ExpandedFunction =
3504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      llvm::Function::Create(ExpandedReduceType,
3514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala                             llvm::GlobalValue::ExternalLinkage,
3524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala                             OldName + ".expand", Module);
3534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(ExpandedFunction->arg_size() == kNumExpandedReduceParams);
3544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
3554e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
3564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
3574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    using llvm::Attribute;
3584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
3594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Argument *InBuf = &(*AI++);
3604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    InBuf->setName("inBuf");
361dfde70a8ae9b77bbf0e8d9d22a55e1d1fda7d64dStephen Hines    InBuf->addAttr(llvm::AttributeSet::get(*Context, InBuf->getArgNo() + 1, llvm::makeArrayRef(Attribute::NoCapture)));
3624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
3634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Argument *OutBuf = &(*AI++);
3644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    OutBuf->setName("outBuf");
365dfde70a8ae9b77bbf0e8d9d22a55e1d1fda7d64dStephen Hines    OutBuf->addAttr(llvm::AttributeSet::get(*Context, OutBuf->getArgNo() + 1, llvm::makeArrayRef(Attribute::NoCapture)));
3664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
3674e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    (AI++)->setName("len");
368bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
369bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
370bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                                                       ExpandedFunction);
371806075b3a54af826fea78490fb213d8a0784138eTobias Grosser    llvm::IRBuilder<> Builder(Begin);
372806075b3a54af826fea78490fb213d8a0784138eTobias Grosser    Builder.CreateRetVoid();
373806075b3a54af826fea78490fb213d8a0784138eTobias Grosser
374bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    return ExpandedFunction;
375357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  }
376357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser
377e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Create skeleton of a general reduce kernel's expanded accumulator.
378e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
379e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // This creates a function with the following signature:
380e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
381e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //  void @func.expand(%RsExpandKernelDriverInfoPfx* nocapture %p,
382e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                    i32 %x1, i32 %x2, accumType* nocapture %accum)
383e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
384e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  llvm::Function *createEmptyExpandedReduceNewAccumulator(llvm::StringRef OldName,
385e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                          llvm::Type *AccumArgTy) {
386e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
387e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
388e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::FunctionType *ExpandedReduceNewAccumulatorType =
389e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        llvm::FunctionType::get(VoidTy,
390e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                {RsExpandKernelDriverInfoPfxTy->getPointerTo(),
391e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 Int32Ty, Int32Ty, AccumArgTy}, false);
392e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Function *FnExpandedAccumulator =
393e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::Function::Create(ExpandedReduceNewAccumulatorType,
394e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                             llvm::GlobalValue::ExternalLinkage,
395e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                             OldName + ".expand", Module);
396e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceNewAccumulatorParams);
397e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
398e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Function::arg_iterator AI = FnExpandedAccumulator->arg_begin();
399e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
400e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    using llvm::Attribute;
401e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
402e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Argument *Arg_p = &(*AI++);
403e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_p->setName("p");
404e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_p->addAttr(llvm::AttributeSet::get(*Context, Arg_p->getArgNo() + 1,
405e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                           llvm::makeArrayRef(Attribute::NoCapture)));
406e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
407e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Argument *Arg_x1 = &(*AI++);
408e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_x1->setName("x1");
409e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
410e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Argument *Arg_x2 = &(*AI++);
411e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_x2->setName("x2");
412e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
413e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Argument *Arg_accum = &(*AI++);
414e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_accum->setName("accum");
415e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_accum->addAttr(llvm::AttributeSet::get(*Context, Arg_accum->getArgNo() + 1,
416e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                               llvm::makeArrayRef(Attribute::NoCapture)));
417e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
418e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
419e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                       FnExpandedAccumulator);
420e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::IRBuilder<> Builder(Begin);
421e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Builder.CreateRetVoid();
422e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
423e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    return FnExpandedAccumulator;
424e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
425e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
426e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @brief Create an empty loop
427e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
428e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// Create a loop of the form:
429e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
430e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// for (i = LowerBound; i < UpperBound; i++)
431e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///   ;
432e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
433e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// After the loop has been created, the builder is set such that
434e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// instructions can be added to the loop body.
435e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
436e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param Builder The builder to use to build this loop. The current
437e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///                position of the builder is the position the loop
438e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///                will be inserted.
439e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param LowerBound The first value of the loop iterator
440e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param UpperBound The maximal value of the loop iterator
441e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param LoopIV A reference that will be set to the loop iterator.
442e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @return The BasicBlock that will be executed after the loop.
443e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
444e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser                               llvm::Value *LowerBound,
445e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser                               llvm::Value *UpperBound,
446e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser                               llvm::PHINode **LoopIV) {
447c2ca742d7d0197c52e49467862844463fb42280fDavid Gross    bccAssert(LowerBound->getType() == UpperBound->getType());
448e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
449e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
450e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    llvm::Value *Cond, *IVNext;
451e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    llvm::PHINode *IV;
452e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
453e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    CondBB = Builder.GetInsertBlock();
4541bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines    AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), nullptr, nullptr);
455bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
456e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
457e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // if (LowerBound < Upperbound)
458e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto LoopHeader
459e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // else
460e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto AfterBB
461e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    CondBB->getTerminator()->eraseFromParent();
462e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.SetInsertPoint(CondBB);
463e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser    Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
464e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
465e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
466e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
467e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // iv.next = iv + 1
468e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // if (iv.next < Upperbound)
469e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto LoopHeader
470e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // else
471e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto AfterBB
472e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.SetInsertPoint(HeaderBB);
473e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
474e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    IV->addIncoming(LowerBound, CondBB);
475e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
476e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    IV->addIncoming(IVNext, HeaderBB);
477e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser    Cond = Builder.CreateICmpULT(IVNext, UpperBound);
478e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
479e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    AfterBB->setName("Exit");
480e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
481e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    *LoopIV = IV;
482e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    return AfterBB;
483e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  }
484e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
48528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // Finish building the outgoing argument list for calling a ForEach-able function.
48628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //
48728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // ArgVector - on input, the non-special arguments
48828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //             on output, the non-special arguments combined with the special arguments
48928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //               from SpecialArgVector
49028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // SpecialArgVector - special arguments (from ExpandSpecialArguments())
49128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // SpecialArgContextIdx - return value of ExpandSpecialArguments()
49228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //                          (position of context argument in SpecialArgVector)
49328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // CalleeFunction - the ForEach-able function being called
49428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // Builder - for inserting code into the caller function
49528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  template<unsigned int ArgVectorLen, unsigned int SpecialArgVectorLen>
49628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  void finishArgList(      llvm::SmallVector<llvm::Value *, ArgVectorLen>        &ArgVector,
49728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     const llvm::SmallVector<llvm::Value *, SpecialArgVectorLen> &SpecialArgVector,
49828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     const int SpecialArgContextIdx,
49928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     const llvm::Function &CalleeFunction,
50028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     llvm::IRBuilder<> &CallerBuilder) {
50128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    /* The context argument (if any) is a pointer to an opaque user-visible type that differs from
50228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     * the RsExpandKernelDriverInfoPfx type used in the function we are generating (although the
50328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     * two types represent the same thing).  Therefore, we must introduce a pointer cast when
50428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     * generating a call to the kernel function.
50528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     */
50628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    const int ArgContextIdx =
50728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross        SpecialArgContextIdx >= 0 ? (ArgVector.size() + SpecialArgContextIdx) : SpecialArgContextIdx;
50828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    ArgVector.append(SpecialArgVector.begin(), SpecialArgVector.end());
50928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    if (ArgContextIdx >= 0) {
51028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      llvm::Type *ContextArgType = nullptr;
51128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      int ArgIdx = ArgContextIdx;
51228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      for (const auto &Arg : CalleeFunction.getArgumentList()) {
51328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross        if (!ArgIdx--) {
51428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross          ContextArgType = Arg.getType();
51528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross          break;
51628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross        }
51728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      }
51828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      bccAssert(ContextArgType);
51928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      ArgVector[ArgContextIdx] = CallerBuilder.CreatePointerCast(ArgVector[ArgContextIdx], ContextArgType);
52028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    }
52128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  }
52228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
523083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // GEPHelper() returns a SmallVector of values suitable for passing
524083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // to IRBuilder::CreateGEP(), and SmallGEPIndices is a typedef for
525083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // the returned data type. It is sized so that the SmallVector
526083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // returned by GEPHelper() never needs to do a heap allocation for
527083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // any list of GEP indices it encounters in the code.
528083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  typedef llvm::SmallVector<llvm::Value *, 3> SmallGEPIndices;
529083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
530083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // Helper for turning a list of constant integer GEP indices into a
531083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // SmallVector of llvm::Value*. The return value is suitable for
532083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // passing to a GetElementPtrInst constructor or IRBuilder::CreateGEP().
533083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //
534083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // Inputs:
535083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //   I32Args should be integers which represent the index arguments
536083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //   to a GEP instruction.
537083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //
538083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // Returns:
539083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //   Returns a SmallVector of ConstantInts.
5404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  SmallGEPIndices GEPHelper(const std::initializer_list<int32_t> I32Args) {
541083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    SmallGEPIndices Out(I32Args.size());
542083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::IntegerType *I32Ty = llvm::Type::getInt32Ty(*Context);
543083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    std::transform(I32Args.begin(), I32Args.end(), Out.begin(),
544083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                   [I32Ty](int32_t Arg) { return llvm::ConstantInt::get(I32Ty, Arg); });
545083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    return Out;
546083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  }
547083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
5488ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosserpublic:
5494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  RSKernelExpandPass(bool pEnableStepOpt = true)
550900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes      : ModulePass(ID), Module(nullptr), Context(nullptr),
551bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes        mEnableStepOpt(pEnableStepOpt) {
552bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
5538ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  }
5548ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
555c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines  virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
556c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines    // This pass does not use any other analysis passes, but it does
557c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines    // add/wrap the existing functions in the module (thus altering the CFG).
558c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines  }
559c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines
56033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // Build contribution to outgoing argument list for calling a
561e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // ForEach-able function or a general reduction accumulator
562e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // function, based on the special parameters of that function.
56333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  //
564e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Signature - metadata bits for the signature of the callee
56533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // X, Arg_p - values derived directly from expanded function,
566e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //            suitable for computing arguments for the callee
56733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // CalleeArgs - contribution is accumulated here
56833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // Bump - invoked once for each contributed outgoing argument
569083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // LoopHeaderInsertionPoint - an Instruction in the loop header, before which
570083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //                            this function can insert loop-invariant loads
57128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //
57228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // Return value is the (zero-based) position of the context (Arg_p)
57328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // argument in the CalleeArgs vector, or a negative value if the
57428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // context argument is not placed in the CalleeArgs vector.
57528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  int ExpandSpecialArguments(uint32_t Signature,
57628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::Value *X,
57728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::Value *Arg_p,
57828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::IRBuilder<> &Builder,
57928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::SmallVector<llvm::Value*, 8> &CalleeArgs,
580083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             std::function<void ()> Bump,
581083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             llvm::Instruction *LoopHeaderInsertionPoint) {
58228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
58328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    bccAssert(CalleeArgs.empty());
58428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
58528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    int Return = -1;
58633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) {
58733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      CalleeArgs.push_back(Arg_p);
58833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      Bump();
58928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      Return = CalleeArgs.size() - 1;
59033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
59133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
59233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
59333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      CalleeArgs.push_back(X);
59433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      Bump();
59533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
59633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
597e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature) ||
598e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
599083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      bccAssert(LoopHeaderInsertionPoint);
60033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
601083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      // Y and Z are loop invariant, so they can be hoisted out of the
602083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      // loop. Set the IRBuilder insertion point to the loop header.
603083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      auto OldInsertionPoint = Builder.saveIP();
604083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      Builder.SetInsertPoint(LoopHeaderInsertionPoint);
605e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
606e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
607083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        SmallGEPIndices YValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent,
608083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          RsLaunchDimensionsFieldY}));
609083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value *YAddr = Builder.CreateInBoundsGEP(Arg_p, YValueGEP, "Y.gep");
610083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        CalleeArgs.push_back(Builder.CreateLoad(YAddr, "Y"));
611e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        Bump();
612e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      }
613e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
614e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
615083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        SmallGEPIndices ZValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent,
616083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          RsLaunchDimensionsFieldZ}));
617083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value *ZAddr = Builder.CreateInBoundsGEP(Arg_p, ZValueGEP, "Z.gep");
618083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        CalleeArgs.push_back(Builder.CreateLoad(ZAddr, "Z"));
619e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        Bump();
620e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      }
621083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
622083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      Builder.restoreIP(OldInsertionPoint);
62333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
62428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
62528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    return Return;
62633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  }
62733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
628e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Generate loop-invariant input processing setup code for an expanded
629e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // ForEach-able function or an expanded general reduction accumulator
630e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // function.
631e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
632e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // LoopHeader - block at the end of which the setup code will be inserted
633e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Arg_p - RSKernelDriverInfo pointer passed to the expanded function
634e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // TBAAPointer - metadata for marking loads of pointer values out of RSKernelDriverInfo
635e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // ArgIter - iterator pointing to first input of the UNexpanded function
636e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // NumInputs - number of inputs (NOT number of ARGUMENTS)
637e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
638e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // InBufPtrs[] - this function sets each array element to point to the first
639e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //               cell of the corresponding input allocation
640e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // InStructTempSlots[] - this function sets each array element either to nullptr
641e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                       or to the result of an alloca (for the case where the
642e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                       calling convention dictates that a value must be passed
643e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                       by reference, and so we need a stacked temporary to hold
644e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                       a copy of that value)
645e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  void ExpandInputsLoopInvariant(llvm::IRBuilder<> &Builder, llvm::BasicBlock *LoopHeader,
646e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::Value *Arg_p,
647e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::MDNode *TBAAPointer,
648e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::Function::arg_iterator ArgIter,
649e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 const size_t NumInputs,
650e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs,
651e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots) {
652e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    bccAssert(NumInputs <= RS_KERNEL_INPUT_LIMIT);
653e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
654e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Extract information about input slots. The work done
655e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // here is loop-invariant, so we can hoist the operations out of the loop.
656e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    auto OldInsertionPoint = Builder.saveIP();
657e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Builder.SetInsertPoint(LoopHeader->getTerminator());
658e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
659e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    for (size_t InputIndex = 0; InputIndex < NumInputs; ++InputIndex, ArgIter++) {
660e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::Type *InType = ArgIter->getType();
661e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
662e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      /*
663e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * AArch64 calling conventions dictate that structs of sufficient size
664e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * get passed by pointer instead of passed by value.  This, combined
665e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * with the fact that we don't allow kernels to operate on pointer
666e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * data means that if we see a kernel with a pointer parameter we know
667e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * that it is a struct input that has been promoted.  As such we don't
668e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * need to convert its type to a pointer.  Later we will need to know
669e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * to create a temporary copy on the stack, so we save this information
670e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * in InStructTempSlots.
671e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       */
672e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (auto PtrType = llvm::dyn_cast<llvm::PointerType>(InType)) {
673e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        llvm::Type *ElementType = PtrType->getElementType();
674e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InStructTempSlots.push_back(Builder.CreateAlloca(ElementType, nullptr,
675e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                         "input_struct_slot"));
676e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      } else {
677e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InType = InType->getPointerTo();
678e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InStructTempSlots.push_back(nullptr);
679e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      }
680e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
681e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      SmallGEPIndices InBufPtrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr,
682e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                             static_cast<int32_t>(InputIndex)}));
683e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::Value    *InBufPtrAddr = Builder.CreateInBoundsGEP(Arg_p, InBufPtrGEP, "input_buf.gep");
684e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::LoadInst *InBufPtr = Builder.CreateLoad(InBufPtrAddr, "input_buf");
685e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::Value    *CastInBufPtr = Builder.CreatePointerCast(InBufPtr, InType, "casted_in");
686e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
687e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (gEnableRsTbaa) {
688e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InBufPtr->setMetadata("tbaa", TBAAPointer);
689e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      }
690e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
691e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      InBufPtrs.push_back(CastInBufPtr);
692e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    }
693e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
694e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Builder.restoreIP(OldInsertionPoint);
695e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
696e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
697e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Generate loop-varying input processing code for an expanded ForEach-able function
698e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // or an expanded general reduction accumulator function.  Also, for the call to the
699e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // UNexpanded function, collect the portion of the argument list corresponding to the
700e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // inputs.
701e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
702e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Arg_x1 - first X coordinate to be processed by the expanded function
703e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // TBAAAllocation - metadata for marking loads of input values out of allocations
704e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // NumInputs -- number of inputs (NOT number of ARGUMENTS)
705e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // InBufPtrs[] - this function consumes the information produced by ExpandInputsLoopInvariant()
706e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // InStructTempSlots[] - this function consumes the information produced by ExpandInputsLoopInvariant()
707e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // IndVar - value of loop induction variable (X coordinate) for a given loop iteration
708e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
709e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // RootArgs - this function sets this to the list of outgoing argument values corresponding
710e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //            to the inputs
711e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  void ExpandInputsBody(llvm::IRBuilder<> &Builder,
712e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        llvm::Value *Arg_x1,
713e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        llvm::MDNode *TBAAAllocation,
714e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        const size_t NumInputs,
715e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        const llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs,
716e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        const llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots,
717e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        llvm::Value *IndVar,
718e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        llvm::SmallVectorImpl<llvm::Value *> &RootArgs) {
719e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Offset = Builder.CreateSub(IndVar, Arg_x1);
720e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
721e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    for (size_t Index = 0; Index < NumInputs; ++Index) {
722e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::Value *InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], Offset);
723e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::Value *Input;
724e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
725e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
726e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
727e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (gEnableRsTbaa) {
728e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InputLoad->setMetadata("tbaa", TBAAAllocation);
729e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      }
730e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
731e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (llvm::Value *TemporarySlot = InStructTempSlots[Index]) {
732e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // Pass a pointer to a temporary on the stack, rather than
733e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // passing a pointer to the original value. We do not want
734e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // the kernel to potentially modify the input data.
735e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
736e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // Note: don't annotate with TBAA, since the kernel might
737e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // have its own TBAA annotations for the pointer argument.
738e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        Builder.CreateStore(InputLoad, TemporarySlot);
739e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        Input = TemporarySlot;
740e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      } else {
741e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        Input = InputLoad;
742e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      }
743e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
744e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      RootArgs.push_back(Input);
745e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    }
746e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
747e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
7488ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  /* Performs the actual optimization on a selected function. On success, the
7498ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   * Module will contain a new function of the name "<NAME>.expand" that
7508ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   * invokes <NAME>() in a loop with the appropriate parameters.
7518ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   */
7524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  bool ExpandOldStyleForEach(llvm::Function *Function, uint32_t Signature) {
753bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    ALOGV("Expanding ForEach-able Function %s",
754bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes          Function->getName().str().c_str());
7558ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
7568ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser    if (!Signature) {
757bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      Signature = getRootSignature(Function);
7588ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser      if (!Signature) {
7598ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        // We couldn't determine how to expand this function based on its
7608ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        // function signature.
7618ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        return false;
7628ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser      }
7638ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser    }
7648ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
765bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::DataLayout DL(Module);
7668ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
767bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
7684e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      createEmptyExpandedForEachKernel(Function->getName());
769db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
770bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    /*
771bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * Extract the expanded function's parameters.  It is guaranteed by
772e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross     * createEmptyExpandedForEachKernel that there will be four parameters.
773bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     */
77433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
7754e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
77633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
777bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator ExpandedFunctionArgIter =
778bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      ExpandedFunction->arg_begin();
779db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
780bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
781bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
782bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
7835010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes    llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
784bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
785900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *InStep  = nullptr;
786900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutStep = nullptr;
787db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
788db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // Construct the actual function body.
789bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
790db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
791cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // Collect and construct the arguments for the kernel().
792db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // Note that we load any loop-invariant arguments before entering the Loop.
793bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
794db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
795900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Type  *InTy      = nullptr;
796083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::Value *InBufPtr = nullptr;
797d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
798083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices InStepGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInStride, 0}));
799083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      llvm::LoadInst *InStepArg  = Builder.CreateLoad(
800083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        Builder.CreateInBoundsGEP(Arg_p, InStepGEP, "instep_addr.gep"), "instep_addr");
801e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
802bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      InTy = (FunctionArgIter++)->getType();
803e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes      InStep = getStepValue(&DL, InTy, InStepArg);
804e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
8052b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      InStep->setName("instep");
806e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
807083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices InputAddrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr, 0}));
808083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      InBufPtr = Builder.CreateLoad(
809083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        Builder.CreateInBoundsGEP(Arg_p, InputAddrGEP, "input_buf.gep"), "input_buf");
810db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
811db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
812900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Type *OutTy = nullptr;
813900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutBasePtr = nullptr;
814d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
815bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      OutTy = (FunctionArgIter++)->getType();
816b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
8172b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      OutStep->setName("outstep");
818083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0}));
819083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep"));
820db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
821db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
822900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *UsrData = nullptr;
823d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
824bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
825083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      llvm::Value *UsrDataPointerAddr = Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldUsr);
826083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      UsrData = Builder.CreatePointerCast(Builder.CreateLoad(UsrDataPointerAddr), UsrDataTy);
827db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      UsrData->setName("UsrData");
828db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
829db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
830083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
83133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    llvm::PHINode *IV;
83233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    createLoop(Builder, Arg_x1, Arg_x2, &IV);
833097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
83433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
83528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    const int CalleeArgsContextIdx = ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
836083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                                                            [&FunctionArgIter]() { FunctionArgIter++; },
837083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                                                            LoopHeader->getTerminator());
838db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
839bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bccAssert(FunctionArgIter == Function->arg_end());
840db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
841cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // Populate the actual call to kernel().
842db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::SmallVector<llvm::Value*, 8> RootArgs;
843db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
844900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *InPtr  = nullptr;
845900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutPtr = nullptr;
846db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
847ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    // Calculate the current input and output pointers
84802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    //
849ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    // We always calculate the input/output pointers with a GEP operating on i8
85002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // values and only cast at the very end to OutTy. This is because the step
85102f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // between two values is given in bytes.
85202f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    //
85302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // TODO: We could further optimize the output by using a GEP operation of
85402f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // type 'OutTy' in cases where the element type of the allocation allows.
85502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    if (OutBasePtr) {
85602f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
85702f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      OutOffset = Builder.CreateMul(OutOffset, OutStep);
858083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutPtr = Builder.CreateInBoundsGEP(OutBasePtr, OutOffset);
85902f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
86002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    }
861bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
862083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    if (InBufPtr) {
863ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
864ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      InOffset = Builder.CreateMul(InOffset, InStep);
865083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      InPtr = Builder.CreateInBoundsGEP(InBufPtr, InOffset);
866ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      InPtr = Builder.CreatePointerCast(InPtr, InTy);
867ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    }
86802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser
869ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    if (InPtr) {
8707ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      RootArgs.push_back(InPtr);
871db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
872db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
87302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    if (OutPtr) {
8747ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      RootArgs.push_back(OutPtr);
875db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
876db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
877db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (UsrData) {
878db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      RootArgs.push_back(UsrData);
879db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
880db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
88128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
882db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
883bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    Builder.CreateCall(Function, RootArgs);
884db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
8857ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    return true;
8867ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines  }
8877ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
8884e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  /* Expand a pass-by-value foreach kernel.
8897ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines   */
8904e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  bool ExpandForEach(llvm::Function *Function, uint32_t Signature) {
891d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
892bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
8937ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
8944e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // TODO: Refactor this to share functionality with ExpandOldStyleForEach.
895bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::DataLayout DL(Module);
8967ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
897bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
8984e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      createEmptyExpandedForEachKernel(Function->getName());
8997ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
900bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    /*
901bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * Extract the expanded function's parameters.  It is guaranteed by
902e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross     * createEmptyExpandedForEachKernel that there will be four parameters.
903bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     */
904881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
9054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
906881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
907bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator ExpandedFunctionArgIter =
908bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      ExpandedFunction->arg_begin();
909bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
910bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
911bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
912bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
9133bc475b206c3fa249a212b90fe989fdcda4d75f9Matt Wala    // Arg_outstep is not used by expanded new-style forEach kernels.
9147ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
9157ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    // Construct the actual function body.
916bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
9177ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
91818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Create TBAA meta-data.
919354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript,
920354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines                 *TBAAAllocation, *TBAAPointer;
921bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDBuilder MDHelper(*Context);
92214588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien
923354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    TBAARenderScriptDistinct =
9244e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      MDHelper.createTBAARoot(kRenderScriptTBAARootName);
9254e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
926354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines        TBAARenderScriptDistinct);
927e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
928e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                       TBAARenderScript);
929e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
930e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                      TBAAAllocation, 0);
931e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
932e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                    TBAARenderScript);
93314588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien    TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
93418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
935881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    /*
936881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     * Collect and construct the arguments for the kernel().
937881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     *
938881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     * Note that we load any loop-invariant arguments before entering the Loop.
939881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     */
940083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    size_t NumRemainingInputs = Function->arg_size();
9417ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
942881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // No usrData parameter on kernels.
943881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    bccAssert(
944881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
945881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
946881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    llvm::Function::arg_iterator ArgIter = Function->arg_begin();
947881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
948881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Check the return type
949bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::Type     *OutTy            = nullptr;
950bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::LoadInst *OutBasePtr       = nullptr;
951bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::Value    *CastedOutBasePtr = nullptr;
952881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
953e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    bool PassOutByPointer = false;
954881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
955d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
956bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *OutBaseTy = Function->getReturnType();
957881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
95874a4b08235990916911b8fe758d656c1171faf26Stephen Hines      if (OutBaseTy->isVoidTy()) {
959e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes        PassOutByPointer = true;
960881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        OutTy = ArgIter->getType();
961881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
962881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        ArgIter++;
963083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        --NumRemainingInputs;
96474a4b08235990916911b8fe758d656c1171faf26Stephen Hines      } else {
96574a4b08235990916911b8fe758d656c1171faf26Stephen Hines        // We don't increment Args, since we are using the actual return type.
966881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        OutTy = OutBaseTy->getPointerTo();
96774a4b08235990916911b8fe758d656c1171faf26Stephen Hines      }
968881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
969083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0}));
970083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep"));
971097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
9729c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      if (gEnableRsTbaa) {
9739c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines        OutBasePtr->setMetadata("tbaa", TBAAPointer);
9749c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      }
97550f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray
976bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray      CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out");
97774a4b08235990916911b8fe758d656c1171faf26Stephen Hines    }
97874a4b08235990916911b8fe758d656c1171faf26Stephen Hines
979083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
980d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala    llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
981881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
982083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    bccAssert(NumRemainingInputs <= RS_KERNEL_INPUT_LIMIT);
983881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
984083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // Create the loop structure.
985083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
986083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::PHINode *IV;
987083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    createLoop(Builder, Arg_x1, Arg_x2, &IV);
988881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
989083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
990083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    const int CalleeArgsContextIdx =
991083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
992083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             [&NumRemainingInputs]() { --NumRemainingInputs; },
993083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             LoopHeader->getTerminator());
994083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
995083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // After ExpandSpecialArguments() gets called, NumRemainingInputs
996083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // counts the number of arguments to the kernel that correspond to
997083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // an array entry from the InPtr field of the DriverInfo
998083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // structure.
999083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    const size_t NumInPtrArguments = NumRemainingInputs;
1000083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
1001083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    if (NumInPtrArguments > 0) {
1002e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, ArgIter, NumInPtrArguments,
1003e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                InBufPtrs, InStructTempSlots);
1004881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    }
10057ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
10067ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    // Populate the actual call to kernel().
10077ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    llvm::SmallVector<llvm::Value*, 8> RootArgs;
10087ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
10099296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala    // Calculate the current input and output pointers.
1010881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
1011881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Output
1012881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
1013900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutPtr = nullptr;
1014bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    if (CastedOutBasePtr) {
10157b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
1016083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffset);
1017bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
1018e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes      if (PassOutByPointer) {
1019881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        RootArgs.push_back(OutPtr);
1020881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes      }
10214102bec56151fb5d9c962fb298412f34a6eacaa8Tobias Grosser    }
10227b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser
1023881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Inputs
102474a4b08235990916911b8fe758d656c1171faf26Stephen Hines
1025083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    if (NumInPtrArguments > 0) {
1026e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInPtrArguments,
1027e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                       InBufPtrs, InStructTempSlots, IV, RootArgs);
10287ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
10297ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
103028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
10317ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
1032bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
10337ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
1034e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    if (OutPtr && !PassOutByPointer) {
10359296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala      RetVal->setName("call.result");
103618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
10379c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      if (gEnableRsTbaa) {
10389c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines        Store->setMetadata("tbaa", TBAAAllocation);
10399c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      }
10407ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
10417ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
1042db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return true;
1043db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
1044db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1045e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Expand a simple reduce-style kernel function.
10464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // The input is a kernel which represents a binary operation,
10484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // of the form
10494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   define foo @func(foo %a, foo %b),
10514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // (More generally, it can be of the forms
10534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   define void @func(foo* %ret, foo* %a, foo* %b)
10554e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   define void @func(foo* %ret, foo1 %a, foo1 %b)
10564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   define foo1 @func(foo2 %a, foo2 %b)
10574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // as a result of argument / return value conversions. Here, "foo1"
10594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // and "foo2" refer to possibly coerced types, and the coerced
10604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // argument type may be different from the coerced return type. See
10614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // "Note on coercion" below.)
10624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // Note also, we do not expect to encounter any case when the
10644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // arguments are promoted to pointers but the return value is
10654e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // unpromoted to pointer, e.g.
10664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10674e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   define foo1 @func(foo* %a, foo* %b)
10684e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10694e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // and we will throw an assertion in this case.)
10704e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10714e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // The input kernel gets expanded into a kernel of the form
10724e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10734e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   define void @func.expand(i8* %inBuf, i8* outBuf, i32 len)
10744e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10754e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // which performs a serial reduction of `len` elements from `inBuf`,
10764e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // and stores the result into `outBuf`. In pseudocode, @func.expand
10774e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // does:
10784e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10794e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   inArr := (foo *)inBuf;
10804e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   accum := inArr[0];
10814e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   for (i := 1; i < len; ++i) {
10824e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //     accum := foo(accum, inArr[i]);
10834e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   }
10844e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   *(foo *)outBuf := accum;
10854e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10864e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // Note on coercion
10874e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10884e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // Both the return value and the argument types may undergo internal
10894e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // coercion in clang as part of call lowering. As a result, the
10904e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // return value type may differ from the argument type even if the
10914e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // types in the RenderScript signaure are the same. For instance, the
10924e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // kernel
10934e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10944e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   int3 add(int3 a, int3 b) { return a + b; }
10954e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10964e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // gets lowered by clang as
10974e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10984e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   define <3 x i32> @add(<4 x i32> %a.coerce, <4 x i32> %b.coerce)
10994e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
11004e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // under AArch64. The details of this process are found in clang,
11014e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // lib/CodeGen/TargetInfo.cpp, under classifyArgumentType() and
11024e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // classifyReturnType() in ARMABIInfo, AArch64ABIInfo. If the value
11034e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // is passed by pointer, then the pointed-to type is not coerced.
11044e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
11054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // Since we lack the original type information, this code does loads
11064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // and stores of allocation data by way of pointers to the coerced
11074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // type.
11084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  bool ExpandReduce(llvm::Function *Function) {
11094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(Function);
11104e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
1111e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    ALOGV("Expanding simple reduce kernel %s", Function->getName().str().c_str());
11124e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11134e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::DataLayout DL(Module);
11144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11154e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // TBAA Metadata
11164e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript, *TBAAAllocation;
11174e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::MDBuilder MDHelper(*Context);
11184e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11194e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    TBAARenderScriptDistinct =
11204e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      MDHelper.createTBAARoot(kRenderScriptTBAARootName);
11214e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
11224e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala        TBAARenderScriptDistinct);
11234e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
11244e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala                                                       TBAARenderScript);
11254e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
11264e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala                                                      TBAAAllocation, 0);
11274e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11284e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Function *ExpandedFunction =
11294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      createEmptyExpandedReduceKernel(Function->getName());
11304e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11314e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Extract the expanded kernel's parameters.  It is guaranteed by
1132e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // createEmptyExpandedReduceKernel that there will be 3 parameters.
11334e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    auto ExpandedFunctionArgIter = ExpandedFunction->arg_begin();
11344e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11354e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *Arg_inBuf  = &*(ExpandedFunctionArgIter++);
11364e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *Arg_outBuf = &*(ExpandedFunctionArgIter++);
11374e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *Arg_len    = &*(ExpandedFunctionArgIter++);
11384e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11394e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(Function->arg_size() == 2 || Function->arg_size() == 3);
11404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Check if, instead of returning a value, the original kernel has
11424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // a pointer parameter which points to a temporary buffer into
11434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // which the return value gets written.
11444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    const bool ReturnValuePointerStyle = (Function->arg_size() == 3);
11454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(Function->getReturnType()->isVoidTy() == ReturnValuePointerStyle);
11464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Check if, instead of being passed by value, the inputs to the
11484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // original kernel are passed by pointer.
11494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    auto FirstArgIter = Function->arg_begin();
11504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // The second argument is always an input to the original kernel.
11514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    auto SecondArgIter = std::next(FirstArgIter);
11524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    const bool InputsPointerStyle = SecondArgIter->getType()->isPointerTy();
11534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Get the output type (i.e. return type of the original kernel).
11554e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::PointerType *OutPtrTy = nullptr;
11564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Type *OutTy = nullptr;
11574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (ReturnValuePointerStyle) {
11584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      OutPtrTy = llvm::dyn_cast<llvm::PointerType>(FirstArgIter->getType());
11594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      bccAssert(OutPtrTy && "Expected a pointer parameter to kernel");
11604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      OutTy = OutPtrTy->getElementType();
11614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    } else {
11624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      OutTy = Function->getReturnType();
11634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      bccAssert(!OutTy->isVoidTy());
11644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      OutPtrTy = OutTy->getPointerTo();
11654e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
11664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11674e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Get the input type (type of the arguments to the original
11684e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // kernel). Some input types are different from the output type,
11694e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // due to explicit coercion that the compiler performs when
11704e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // lowering the parameters. See "Note on coercion" above.
11714e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::PointerType *InPtrTy;
11724e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Type *InTy;
11734e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (InputsPointerStyle) {
11744e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      InPtrTy = llvm::dyn_cast<llvm::PointerType>(SecondArgIter->getType());
11754e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      bccAssert(InPtrTy && "Expected a pointer parameter to kernel");
11764e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      bccAssert(ReturnValuePointerStyle);
11774e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      bccAssert(std::next(SecondArgIter)->getType() == InPtrTy &&
11784e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala                "Input type mismatch");
11794e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      InTy = InPtrTy->getElementType();
11804e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    } else {
11814e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      InTy = SecondArgIter->getType();
11824e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      InPtrTy = InTy->getPointerTo();
11834e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      if (!ReturnValuePointerStyle) {
11844e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala        bccAssert(InTy == FirstArgIter->getType() && "Input type mismatch");
11854e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      } else {
11864e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala        bccAssert(InTy == std::next(SecondArgIter)->getType() &&
11874e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala                  "Input type mismatch");
11884e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      }
11894e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
11904e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11914e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // The input type should take up the same amount of space in
11924e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // memory as the output type.
11934e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(DL.getTypeAllocSize(InTy) == DL.getTypeAllocSize(OutTy));
11944e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11954e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Construct the actual function body.
11964e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
11974e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11984e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Cast input and output buffers to appropriate types.
11994e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *InBuf = Builder.CreatePointerCast(Arg_inBuf, InPtrTy);
12004e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *OutBuf = Builder.CreatePointerCast(Arg_outBuf, OutPtrTy);
12014e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12024e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Create a slot to pass temporary results back. This needs to be
12034e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // separate from the accumulator slot because the kernel may mark
12044e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // the return value slot as noalias.
12054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *ReturnBuf = nullptr;
12064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (ReturnValuePointerStyle) {
12074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      ReturnBuf = Builder.CreateAlloca(OutTy, nullptr, "ret.tmp");
12084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
12094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12104e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Create a slot to hold the second input if the inputs are passed
12114e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // by pointer to the original kernel. We cannot directly pass a
12124e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // pointer to the input buffer, because the kernel may modify its
12134e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // inputs.
12144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *SecondInputTempBuf = nullptr;
12154e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (InputsPointerStyle) {
12164e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      SecondInputTempBuf = Builder.CreateAlloca(InTy, nullptr, "in.tmp");
12174e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
12184e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12194e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Create a slot to accumulate temporary results, and fill it with
12204e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // the first value.
12214e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *AccumBuf = Builder.CreateAlloca(OutTy, nullptr, "accum");
12224e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Cast to OutPtrTy before loading, since AccumBuf has type OutPtrTy.
12234e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::LoadInst *FirstElementLoad = Builder.CreateLoad(
12244e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      Builder.CreatePointerCast(InBuf, OutPtrTy));
12254e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (gEnableRsTbaa) {
12264e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      FirstElementLoad->setMetadata("tbaa", TBAAAllocation);
12274e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
12284e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Memory operations with AccumBuf shouldn't be marked with
12294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // RenderScript TBAA, since this might conflict with TBAA metadata
12304e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // in the kernel function when AccumBuf is passed by pointer.
12314e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    Builder.CreateStore(FirstElementLoad, AccumBuf);
12324e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12334e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Loop body
12344e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12354e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Create the loop structure. Note that the first input in the input buffer
12364e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // has already been accumulated, so that we start at index 1.
12374e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::PHINode *IndVar;
12384e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *Start = llvm::ConstantInt::get(Arg_len->getType(), 1);
12394e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::BasicBlock *Exit = createLoop(Builder, Start, Arg_len, &IndVar);
12404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *InputPtr = Builder.CreateInBoundsGEP(InBuf, IndVar, "next_input.gep");
12424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Set up arguments and call the original (unexpanded) kernel.
12444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    //
12454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // The original kernel can have at most 3 arguments, which is
12464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // achieved when the signature looks like:
12474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    //
12484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    //    define void @func(foo* %ret, bar %a, bar %b)
12494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    //
12504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // (bar can be one of foo/foo.coerce/foo*).
12514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::SmallVector<llvm::Value *, 3> KernelArgs;
12524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (ReturnValuePointerStyle) {
12544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      KernelArgs.push_back(ReturnBuf);
12554e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
12564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (InputsPointerStyle) {
12584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      bccAssert(ReturnValuePointerStyle);
12594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      // Because the return buffer is copied back into the
12604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      // accumulator, it's okay if the accumulator is overwritten.
12614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      KernelArgs.push_back(AccumBuf);
12624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      llvm::LoadInst *InputLoad = Builder.CreateLoad(InputPtr);
12644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      if (gEnableRsTbaa) {
12654e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala        InputLoad->setMetadata("tbaa", TBAAAllocation);
12664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      }
12674e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      Builder.CreateStore(InputLoad, SecondInputTempBuf);
12684e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12694e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      KernelArgs.push_back(SecondInputTempBuf);
12704e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    } else {
12714e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      // InPtrTy may be different from OutPtrTy (the type of
12724e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      // AccumBuf), so first cast the accumulator buffer to the
12734e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      // pointer type corresponding to the input argument type.
12744e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      KernelArgs.push_back(
12754e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala        Builder.CreateLoad(Builder.CreatePointerCast(AccumBuf, InPtrTy)));
12764e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12774e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      llvm::LoadInst *LoadedArg = Builder.CreateLoad(InputPtr);
12784e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      if (gEnableRsTbaa) {
12794e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala        LoadedArg->setMetadata("tbaa", TBAAAllocation);
12804e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      }
12814e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      KernelArgs.push_back(LoadedArg);
12824e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
12834e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12844e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *RetVal = Builder.CreateCall(Function, KernelArgs);
12854e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12864e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    const uint64_t ElementSize = DL.getTypeStoreSize(OutTy);
12874e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    const uint64_t ElementAlign = DL.getABITypeAlignment(OutTy);
12884e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12894e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Store the output in the accumulator.
12904e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (ReturnValuePointerStyle) {
12914e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      Builder.CreateMemCpy(AccumBuf, ReturnBuf, ElementSize, ElementAlign);
12924e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    } else {
12934e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      Builder.CreateStore(RetVal, AccumBuf);
12944e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
12954e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12964e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Loop exit
12974e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    Builder.SetInsertPoint(Exit, Exit->begin());
12984e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12994e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::LoadInst *OutputLoad = Builder.CreateLoad(AccumBuf);
13004e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::StoreInst *OutputStore = Builder.CreateStore(OutputLoad, OutBuf);
13014e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (gEnableRsTbaa) {
13024e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      OutputStore->setMetadata("tbaa", TBAAAllocation);
13034e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
13044e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
13054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    return true;
13064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  }
13074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
1308e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Certain categories of functions that make up a general
1309e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // reduce-style kernel are called directly from the driver with no
1310e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // expansion needed.  For a function in such a category, we need to
1311e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // promote linkage from static to external, to ensure that the
1312e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // function is visible to the driver in the dynamic symbol table.
1313e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // This promotion is safe because we don't have any kind of cross
1314e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // translation unit linkage model (except for linking against
1315e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // RenderScript libraries), so we do not risk name clashes.
1316e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  bool PromoteReduceNewFunction(const char *Name, FunctionSet &PromotedFunctions) {
1317e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    if (!Name)  // a presumably-optional function that is not present
1318e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      return false;
1319e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1320e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Function *Fn = Module->getFunction(Name);
1321e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    bccAssert(Fn != nullptr);
1322e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    if (PromotedFunctions.insert(Fn).second) {
1323e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      bccAssert(Fn->getLinkage() == llvm::GlobalValue::InternalLinkage);
1324e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      Fn->setLinkage(llvm::GlobalValue::ExternalLinkage);
1325e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      return true;
1326e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    }
1327e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1328e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    return false;
1329e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
1330e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1331e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Expand the accumulator function for a general reduce-style kernel.
1332e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1333e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // The input is a function of the form
1334e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1335e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //   define void @func(accumType* %accum, foo1 in1[, ... fooN inN] [, special arguments])
1336e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1337e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // where all arguments except the first are the same as for a foreach kernel.
1338e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1339e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // The input accumulator function gets expanded into a function of the form
1340e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1341e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //   define void @func.expand(%RsExpandKernelDriverInfoPfx* %p, i32 %x1, i32 %x2, accumType* %accum)
1342e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1343e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // which performs a serial accumulaion of elements [x1, x2) into *%accum.
1344e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1345e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // In pseudocode, @func.expand does:
1346e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1347e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //   for (i = %x1; i < %x2; ++i) {
1348e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //     func(%accum,
1349e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //          *((foo1 *)p->inPtr[0] + i)[, ... *((fooN *)p->inPtr[N-1] + i)
1350e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //          [, p] [, i] [, p->current.y] [, p->current.z]);
1351e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //   }
1352e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1353e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // This is very similar to foreach kernel expansion with no output.
1354e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  bool ExpandReduceNewAccumulator(llvm::Function *FnAccumulator, uint32_t Signature, size_t NumInputs) {
1355e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    ALOGV("Expanding accumulator %s for general reduce kernel",
1356e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross          FnAccumulator->getName().str().c_str());
1357e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1358e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Create TBAA meta-data.
1359e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript,
1360e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                 *TBAAAllocation, *TBAAPointer;
1361e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::MDBuilder MDHelper(*Context);
1362e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAARenderScriptDistinct =
1363e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      MDHelper.createTBAARoot(kRenderScriptTBAARootName);
1364e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
1365e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        TBAARenderScriptDistinct);
1366e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
1367e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                       TBAARenderScript);
1368e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
1369e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                      TBAAAllocation, 0);
1370e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
1371e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                    TBAARenderScript);
1372e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
1373e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1374e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    auto AccumulatorArgIter = FnAccumulator->arg_begin();
1375e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1376e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Create empty accumulator function.
1377e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Function *FnExpandedAccumulator =
1378e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        createEmptyExpandedReduceNewAccumulator(FnAccumulator->getName(),
1379e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                (AccumulatorArgIter++)->getType());
1380e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1381e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Extract the expanded accumulator's parameters.  It is
1382e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // guaranteed by createEmptyExpandedReduceNewAccumulator that
1383e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // there will be 4 parameters.
1384e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceNewAccumulatorParams);
1385e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    auto ExpandedAccumulatorArgIter = FnExpandedAccumulator->arg_begin();
1386e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Arg_p     = &*(ExpandedAccumulatorArgIter++);
1387e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Arg_x1    = &*(ExpandedAccumulatorArgIter++);
1388e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Arg_x2    = &*(ExpandedAccumulatorArgIter++);
1389e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Arg_accum = &*(ExpandedAccumulatorArgIter++);
1390e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1391e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Construct the actual function body.
1392e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::IRBuilder<> Builder(FnExpandedAccumulator->getEntryBlock().begin());
1393e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1394e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Create the loop structure.
1395e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
1396e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::PHINode *IndVar;
1397e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    createLoop(Builder, Arg_x1, Arg_x2, &IndVar);
1398e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1399e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
1400e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    const int CalleeArgsContextIdx =
1401e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        ExpandSpecialArguments(Signature, IndVar, Arg_p, Builder, CalleeArgs,
1402e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                               [](){}, LoopHeader->getTerminator());
1403e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1404e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
1405e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
1406e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, AccumulatorArgIter, NumInputs,
1407e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                              InBufPtrs, InStructTempSlots);
1408e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1409e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Populate the actual call to the original accumulator.
1410e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::SmallVector<llvm::Value*, 8> RootArgs;
1411e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    RootArgs.push_back(Arg_accum);
1412e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInputs, InBufPtrs, InStructTempSlots,
1413e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                     IndVar, RootArgs);
1414e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *FnAccumulator, Builder);
1415e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Builder.CreateCall(FnAccumulator, RootArgs);
1416e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1417e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    return true;
1418e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
1419e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
142018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// @brief Checks if pointers to allocation internals are exposed
142118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  ///
142218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// This function verifies if through the parameters passed to the kernel
142318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// or through calls to the runtime library the script gains access to
142418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// pointers pointing to data within a RenderScript Allocation.
142518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// If we know we control all loads from and stores to data within
142618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// RenderScript allocations and if we know the run-time internal accesses
142718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// are all annotated with RenderScript TBAA metadata, only then we
142818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// can safely use TBAA to distinguish between generic and from-allocation
142918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// pointers.
1430bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  bool allocPointersExposed(llvm::Module &Module) {
143118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Old style kernel function can expose pointers to elements within
143218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // allocations.
143318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // TODO: Extend analysis to allow simple cases of old-style kernels.
143425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    for (size_t i = 0; i < mExportForEachCount; ++i) {
143525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      const char *Name = mExportForEachNameList[i];
143625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      uint32_t Signature = mExportForEachSignatureList[i];
1437bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (Module.getFunction(Name) &&
1438d88177580db4ddedf680854c51db333c97eabc59Stephen Hines          !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
143918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
144018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
144118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
144218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
144318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Check for library functions that expose a pointer to an Allocation or
144418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // that are not yet annotated with RenderScript-specific tbaa information.
1445e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala    static const std::vector<const char *> Funcs{
1446e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAt(...)
1447e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsGetElementAt13rs_allocationj",
1448e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsGetElementAt13rs_allocationjj",
1449e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsGetElementAt13rs_allocationjjj",
1450e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1451e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsSetElementAt()
1452e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsSetElementAt13rs_allocationPvj",
1453e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsSetElementAt13rs_allocationPvjj",
1454e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsSetElementAt13rs_allocationPvjjj",
1455e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1456e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAtYuv_uchar_Y()
1457e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj",
1458e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1459e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAtYuv_uchar_U()
1460e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj",
1461e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1462e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAtYuv_uchar_V()
1463e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj",
1464e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala    };
1465e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1466e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala    for (auto FI : Funcs) {
1467e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      llvm::Function *Function = Module.getFunction(FI);
146818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1469bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (!Function) {
1470e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala        ALOGE("Missing run-time function '%s'", FI);
147118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
147218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
147318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1474bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (Function->getNumUses() > 0) {
147518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
147618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
147718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
147818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
147918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    return false;
148018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  }
148118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
148218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
148318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  ///
148418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// The TBAA metadata used to annotate loads/stores from RenderScript
1485e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// Allocations is generated in a separate TBAA tree with a
1486354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// "RenderScript Distinct TBAA" root node. LLVM does assume may-alias for
1487354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// all nodes in unrelated alias analysis trees. This function makes the
1488354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// "RenderScript TBAA" node (which is parented by the Distinct TBAA root),
1489e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With
1490e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// the connected trees every access to an Allocation is resolved to
1491e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// must-alias if compared to a normal C/C++ access.
1492bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
1493bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDBuilder MDHelper(*Context);
1494354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScriptDistinct =
1495354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines      MDHelper.createTBAARoot("RenderScript Distinct TBAA");
1496354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScript = MDHelper.createTBAANode(
1497354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines        "RenderScript TBAA", TBAARenderScriptDistinct);
1498bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDNode *TBAARoot     = MDHelper.createTBAARoot("Simple C/C++ TBAA");
1499354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    TBAARenderScript->replaceOperandWith(1, TBAARoot);
150018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  }
150118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1502bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  virtual bool runOnModule(llvm::Module &Module) {
1503bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bool Changed  = false;
1504bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    this->Module  = &Module;
15054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    Context = &Module.getContext();
1506bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
15074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    buildTypes();
1508bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
1509bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bcinfo::MetadataExtractor me(&Module);
151025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    if (!me.extract()) {
151125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      ALOGE("Could not extract metadata from module!");
151225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      return false;
151325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    }
15144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
15154e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Expand forEach_* style kernels.
151625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachCount = me.getExportForEachSignatureCount();
151725eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachNameList = me.getExportForEachNameList();
151825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachSignatureList = me.getExportForEachSignatureList();
1519db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
152025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    for (size_t i = 0; i < mExportForEachCount; ++i) {
152125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      const char *name = mExportForEachNameList[i];
152225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      uint32_t signature = mExportForEachSignatureList[i];
1523bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Function *kernel = Module.getFunction(name);
1524cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser      if (kernel) {
1525d88177580db4ddedf680854c51db333c97eabc59Stephen Hines        if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
15264e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala          Changed |= ExpandForEach(kernel, signature);
1527acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
1528acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        } else if (kernel->getReturnType()->isVoidTy()) {
15294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala          Changed |= ExpandOldStyleForEach(kernel, signature);
1530acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
1531acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        } else {
1532acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // There are some graphics root functions that are not
1533acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // expanded, but that will be called directly. For those
1534acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // functions, we can not set the linkage to internal.
1535acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        }
1536cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines      }
1537db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
1538db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1539e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Expand simple reduce_* style kernels.
15404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    mExportReduceCount = me.getExportReduceCount();
15414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    mExportReduceNameList = me.getExportReduceNameList();
15424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
15434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    for (size_t i = 0; i < mExportReduceCount; ++i) {
15444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      llvm::Function *kernel = Module.getFunction(mExportReduceNameList[i]);
15454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      if (kernel) {
15464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala        Changed |= ExpandReduce(kernel);
15474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      }
15484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
15494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
1550e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Process general reduce_* style functions.
1551e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    const size_t ExportReduceNewCount = me.getExportReduceNewCount();
1552e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    const bcinfo::MetadataExtractor::ReduceNew *ExportReduceNewList = me.getExportReduceNewList();
1553e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    //   Note that functions can be shared between kernels
1554e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    FunctionSet PromotedFunctions, ExpandedAccumulators;
1555e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1556e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    for (size_t i = 0; i < ExportReduceNewCount; ++i) {
1557e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      Changed |= PromoteReduceNewFunction(ExportReduceNewList[i].mInitializerName, PromotedFunctions);
1558e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      Changed |= PromoteReduceNewFunction(ExportReduceNewList[i].mOutConverterName, PromotedFunctions);
1559e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1560e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      // Accumulator
1561e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::Function *accumulator = Module.getFunction(ExportReduceNewList[i].mAccumulatorName);
1562e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      bccAssert(accumulator != nullptr);
1563e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (ExpandedAccumulators.insert(accumulator).second)
1564e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        Changed |= ExpandReduceNewAccumulator(accumulator,
1565e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                              ExportReduceNewList[i].mSignature,
1566e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                              ExportReduceNewList[i].mInputCount);
1567e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    }
1568e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
15694e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (gEnableRsTbaa && !allocPointersExposed(Module)) {
1570bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      connectRenderScriptTBAAMetadata(Module);
157118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
157218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1573cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    return Changed;
1574db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
1575db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1576db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  virtual const char *getPassName() const {
15774e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    return "forEach_* and reduce_* function expansion";
1578db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
1579db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
15804e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala}; // end RSKernelExpandPass
1581db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
15827a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end anonymous namespace
15837a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
15844e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walachar RSKernelExpandPass::ID = 0;
15854e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walastatic llvm::RegisterPass<RSKernelExpandPass> X("kernelexp", "Kernel Expand Pass");
1586db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1587db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace bcc {
1588db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
15897a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaollvm::ModulePass *
15904e7a50685ae18a24087f6f2a51c604e71fab69e2Matt WalacreateRSKernelExpandPass(bool pEnableStepOpt) {
15914e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  return new RSKernelExpandPass(pEnableStepOpt);
15927a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao}
1593db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
15947a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end namespace bcc
1595