1db169187dea4602e4ad32058762d23d474753fd0Stephen Hines/*
2db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Copyright 2012, The Android Open Source Project
3db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
4db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Licensed under the Apache License, Version 2.0 (the "License");
5db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * you may not use this file except in compliance with the License.
6db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * You may obtain a copy of the License at
7db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
8db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *     http://www.apache.org/licenses/LICENSE-2.0
9db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
10db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Unless required by applicable law or agreed to in writing, software
11db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * distributed under the License is distributed on an "AS IS" BASIS,
12db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * See the License for the specific language governing permissions and
14db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * limitations under the License.
15db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */
16db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
176e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines#include "bcc/Assert.h"
18e198abec6c5e3eab380ccf6897b0a0b9c2dd92ddStephen Hines#include "bcc/Renderscript/RSTransforms.h"
1957fd9f882f3359be4201c42b02aebf785d311df2David Gross#include "bcc/Renderscript/RSUtils.h"
207a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
217a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao#include <cstdlib>
2233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross#include <functional>
23e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross#include <unordered_set>
247a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
25b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DerivedTypes.h>
26b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Function.h>
27b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Instructions.h>
28b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/IRBuilder.h>
2918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser#include <llvm/IR/MDBuilder.h>
30b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Module.h>
31c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Pass.h>
327ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines#include <llvm/Support/raw_ostream.h>
33b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DataLayout.h>
34cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser#include <llvm/IR/Function.h>
35b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Type.h>
36806075b3a54af826fea78490fb213d8a0784138eTobias Grosser#include <llvm/Transforms/Utils/BasicBlockUtils.h>
37c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang
38c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include "bcc/Config/Config.h"
39ef73a242762bcd8113b9b65ceccbe7d909b5acbcZonr Chang#include "bcc/Support/Log.h"
40db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
41d88177580db4ddedf680854c51db333c97eabc59Stephen Hines#include "bcinfo/MetadataExtractor.h"
42d88177580db4ddedf680854c51db333c97eabc59Stephen Hines
434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala#ifndef __DISABLE_ASSERTS
444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala// Only used in bccAssert()
454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst int kNumExpandedForeachParams = 4;
46a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Grossconst int kNumExpandedReduceAccumulatorParams = 4;
474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala#endif
484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst char kRenderScriptTBAARootName[] = "RenderScript Distinct TBAA";
504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst char kRenderScriptTBAANodeName[] = "RenderScript TBAA";
51bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
527a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaousing namespace bcc;
537a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
54db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace {
557a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
56354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hinesstatic const bool gEnableRsTbaa = true;
579c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines
584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala/* RSKernelExpandPass - This pass operates on functions that are able
594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * to be called via rsForEach(), "foreach_<NAME>", or
604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * "reduce_<NAME>". We create an inner loop for the function to be
614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * invoked over the appropriate data cells of the input/output
624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * allocations (adjusting other relevant parameters as we go). We
634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * support doing this for any forEach or reduce style compute
644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * kernels. The new function name is the original function name
654e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * followed by ".expand". Note that we still generate code for the
664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * original function.
677a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao */
684e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaclass RSKernelExpandPass : public llvm::ModulePass {
6933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grosspublic:
70db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  static char ID;
71db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
7233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grossprivate:
73e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  static const size_t RS_KERNEL_INPUT_LIMIT = 8; // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h
74e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
75e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  typedef std::unordered_set<llvm::Function *> FunctionSet;
76e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
77e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  enum RsLaunchDimensionsField {
78e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldX,
79e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldY,
80e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldZ,
81e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldLod,
82e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldFace,
83e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldArray,
84e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
85e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldCount
86e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  };
87e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
88e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  enum RsExpandKernelDriverInfoPfxField {
89e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInPtr,
90e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInStride,
91e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInLen,
92e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutPtr,
93e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutStride,
94e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutLen,
95e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldDim,
96e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldCurrent,
97e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldUsr,
98e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldUsLenr,
99e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
100e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldCount
101e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  };
10233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
103bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::Module *Module;
104bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::LLVMContext *Context;
105bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
106bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  /*
1074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala   * Pointers to LLVM type information for the the function signatures
1084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala   * for expanded functions. These must be re-calculated for each module
1094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala   * the pass is run on.
110bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes   */
111a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross  llvm::FunctionType *ExpandedForEachType;
112e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  llvm::Type *RsExpandKernelDriverInfoPfxTy;
113db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
11425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  uint32_t mExportForEachCount;
11525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  const char **mExportForEachNameList;
11625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  const uint32_t *mExportForEachSignatureList;
117cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines
1182b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // Turns on optimization of allocation stride values.
1192b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  bool mEnableStepOpt;
1202b04086acbef6520ae2c54a868b1271abf053122Stephen Hines
121bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  uint32_t getRootSignature(llvm::Function *Function) {
122db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    const llvm::NamedMDNode *ExportForEachMetadata =
123bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes        Module->getNamedMetadata("#rs_export_foreach");
124db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
125db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (!ExportForEachMetadata) {
126db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      llvm::SmallVector<llvm::Type*, 8> RootArgTys;
127bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      for (llvm::Function::arg_iterator B = Function->arg_begin(),
128bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                                        E = Function->arg_end();
129db169187dea4602e4ad32058762d23d474753fd0Stephen Hines           B != E;
130db169187dea4602e4ad32058762d23d474753fd0Stephen Hines           ++B) {
131db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        RootArgTys.push_back(B->getType());
132db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      }
133db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
134db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // For pre-ICS bitcode, we may not have signature information. In that
135db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // case, we use the size of the RootArgTys to select the number of
136db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // arguments.
137db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      return (1 << RootArgTys.size()) - 1;
138db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
139db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1407ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    if (ExportForEachMetadata->getNumOperands() == 0) {
1417ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      return 0;
1427ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
1437ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
1446e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines    bccAssert(ExportForEachMetadata->getNumOperands() > 0);
145db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
146cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // We only handle the case for legacy root() functions here, so this is
147cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // hard-coded to look at only the first such function.
148db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
149900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    if (SigNode != nullptr && SigNode->getNumOperands() == 1) {
1501bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines      llvm::Metadata *SigMD = SigNode->getOperand(0);
1511bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines      if (llvm::MDString *SigS = llvm::dyn_cast<llvm::MDString>(SigMD)) {
1521bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines        llvm::StringRef SigString = SigS->getString();
153db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        uint32_t Signature = 0;
154db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        if (SigString.getAsInteger(10, Signature)) {
155db169187dea4602e4ad32058762d23d474753fd0Stephen Hines          ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
156db169187dea4602e4ad32058762d23d474753fd0Stephen Hines          return 0;
157db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        }
158db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        return Signature;
159db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      }
160db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
161db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
162db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return 0;
163db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
164db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
165429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray  bool isStepOptSupported(llvm::Type *AllocType) {
166429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
167429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
168429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
169429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
170429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (mEnableStepOpt) {
171429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
172429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
173429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
174429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType == VoidPtrTy) {
175429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
176429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
177429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
178429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (!PT) {
179429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
180429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
181429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
182429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // remaining conditions are 64-bit only
183429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (VoidPtrTy->getPrimitiveSizeInBits() == 32) {
184429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return true;
185429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
186429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
187429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // coerce suggests an upconverted struct type, which we can't support
188429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) {
189429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
190429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
191429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
192429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported
193429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2);
194429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128);
195429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType == V2xi64Ty || AllocType == Int128Ty) {
196429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
197429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
198429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
199429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    return true;
200429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray  }
201429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
2022b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // Get the actual value we should use to step through an allocation.
2037b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  //
2047b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // Normally the value we use to step through an allocation is given to us by
2057b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // the driver. However, for certain primitive data types, we can derive an
2067b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // integer constant for the step value. We use this integer constant whenever
2077b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // possible to allow further compiler optimizations to take place.
2087b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  //
209b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines  // DL - Target Data size/layout information.
2102b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // T - Type of allocation (should be a pointer).
2112b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // OrigStep - Original step increment (root.expand() input from driver).
212bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
2132b04086acbef6520ae2c54a868b1271abf053122Stephen Hines                            llvm::Value *OrigStep) {
214b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines    bccAssert(DL);
215bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bccAssert(AllocType);
2162b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    bccAssert(OrigStep);
217bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
218429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (isStepOptSupported(AllocType)) {
2192b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      llvm::Type *ET = PT->getElementType();
220b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines      uint64_t ETSize = DL->getTypeAllocSize(ET);
221bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
2222b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      return llvm::ConstantInt::get(Int32Ty, ETSize);
2232b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    } else {
2242b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      return OrigStep;
2252b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    }
2262b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  }
2272b04086acbef6520ae2c54a868b1271abf053122Stephen Hines
228097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes  /// Builds the types required by the pass for the given context.
229bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  void buildTypes(void) {
230e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    // Create the RsLaunchDimensionsTy and RsExpandKernelDriverInfoPfxTy structs.
231bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
232e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8Ty                   = llvm::Type::getInt8Ty(*Context);
233e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8PtrTy                = Int8Ty->getPointerTo();
234e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8PtrArrayInputLimitTy = llvm::ArrayType::get(Int8PtrTy, RS_KERNEL_INPUT_LIMIT);
235e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32Ty                  = llvm::Type::getInt32Ty(*Context);
236e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32ArrayInputLimitTy   = llvm::ArrayType::get(Int32Ty, RS_KERNEL_INPUT_LIMIT);
237e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *VoidPtrTy                = llvm::Type::getInt8PtrTy(*Context);
238e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32Array4Ty            = llvm::ArrayType::get(Int32Ty, 4);
239097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
240097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes    /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h:
241db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *
242e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * struct RsLaunchDimensions {
243e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t x;
244db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   uint32_t y;
245db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   uint32_t z;
246e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t lod;
247e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t face;
248e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t array[4];
249e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * };
250e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     */
251e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::SmallVector<llvm::Type*, RsLaunchDimensionsFieldCount> RsLaunchDimensionsTypes;
252e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t x
253e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t y
254e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t z
255e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t lod
256e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t face
257e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Array4Ty); // uint32_t array[4]
258e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::StructType *RsLaunchDimensionsTy =
259e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        llvm::StructType::create(RsLaunchDimensionsTypes, "RsLaunchDimensions");
260e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
2611d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross    /* Defined as the beginning of RsExpandKernelDriverInfo in frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h:
262e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
263e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * struct RsExpandKernelDriverInfoPfx {
264e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT];
265e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t inStride[RS_KERNEL_INPUT_LIMIT];
266e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t inLen;
267e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
268e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT];
269e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t outStride[RS_KERNEL_INPUT_LIMIT];
270e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t outLen;
271e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
272e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // Dimension of the launch
273e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     RsLaunchDimensions dim;
274e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
275e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // The walking iterator of the launch
276e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     RsLaunchDimensions current;
277e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
278e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     const void *usr;
279e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t usrLen;
280e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
281e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // Items below this line are not used by the compiler and can be change in the driver.
282e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // So the compiler must assume there are an unknown number of fields of unknown type
283e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // beginning here.
284db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     * };
2851d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross     *
2861d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross     * The name "RsExpandKernelDriverInfoPfx" is known to RSInvariantPass (RSInvariant.cpp).
287db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     */
288e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::SmallVector<llvm::Type*, RsExpandKernelDriverInfoPfxFieldCount> RsExpandKernelDriverInfoPfxTypes;
289e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT]
290e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy);   // uint32_t inStride[RS_KERNEL_INPUT_LIMIT]
291e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t inLen
292e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT]
293e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy);   // uint32_t outStride[RS_KERNEL_INPUT_LIMIT]
294e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t outLen
295e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy);     // RsLaunchDimensions dim
296e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy);     // RsLaunchDimensions current
297e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy);                // const void *usr
298e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t usrLen
299e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    RsExpandKernelDriverInfoPfxTy =
300e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx");
301bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
302bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    // Create the function type for expanded kernels.
3034e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
304bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
305e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *RsExpandKernelDriverInfoPfxPtrTy = RsExpandKernelDriverInfoPfxTy->getPointerTo();
3064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // void (const RsExpandKernelDriverInfoPfxTy *p, uint32_t x1, uint32_t x2, uint32_t outstep)
3074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    ExpandedForEachType = llvm::FunctionType::get(VoidTy,
3084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala        {RsExpandKernelDriverInfoPfxPtrTy, Int32Ty, Int32Ty, Int32Ty}, false);
3098ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  }
3108ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
3114e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  /// @brief Create skeleton of the expanded foreach kernel.
312357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
313357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  /// This creates a function with the following signature:
314357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
315357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
3165010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes  ///         uint32_t outstep)
317357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
3184e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  llvm::Function *createEmptyExpandedForEachKernel(llvm::StringRef OldName) {
319bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
3204e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      llvm::Function::Create(ExpandedForEachType,
321bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                             llvm::GlobalValue::ExternalLinkage,
322bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                             OldName + ".expand", Module);
3234e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
324bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
325bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("p");
326bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("x1");
327bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("x2");
328bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("arg_outstep");
3294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
3304e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala                                                       ExpandedFunction);
3314e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::IRBuilder<> Builder(Begin);
3324e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    Builder.CreateRetVoid();
3334e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    return ExpandedFunction;
3344e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  }
3354e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
336e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Create skeleton of a general reduce kernel's expanded accumulator.
337e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
338e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // This creates a function with the following signature:
339e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
340e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //  void @func.expand(%RsExpandKernelDriverInfoPfx* nocapture %p,
341e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                    i32 %x1, i32 %x2, accumType* nocapture %accum)
342e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
343a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross  llvm::Function *createEmptyExpandedReduceAccumulator(llvm::StringRef OldName,
344a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross                                                       llvm::Type *AccumArgTy) {
345e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
346e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
347a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross    llvm::FunctionType *ExpandedReduceAccumulatorType =
348e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        llvm::FunctionType::get(VoidTy,
349e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                {RsExpandKernelDriverInfoPfxTy->getPointerTo(),
350e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 Int32Ty, Int32Ty, AccumArgTy}, false);
351e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Function *FnExpandedAccumulator =
352a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross      llvm::Function::Create(ExpandedReduceAccumulatorType,
353e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                             llvm::GlobalValue::ExternalLinkage,
354e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                             OldName + ".expand", Module);
355a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross    bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceAccumulatorParams);
356e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
357e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Function::arg_iterator AI = FnExpandedAccumulator->arg_begin();
358e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
359e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    using llvm::Attribute;
360e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
361e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Argument *Arg_p = &(*AI++);
362e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_p->setName("p");
363e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_p->addAttr(llvm::AttributeSet::get(*Context, Arg_p->getArgNo() + 1,
364e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                           llvm::makeArrayRef(Attribute::NoCapture)));
365e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
366e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Argument *Arg_x1 = &(*AI++);
367e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_x1->setName("x1");
368e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
369e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Argument *Arg_x2 = &(*AI++);
370e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_x2->setName("x2");
371e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
372e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Argument *Arg_accum = &(*AI++);
373e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_accum->setName("accum");
374e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_accum->addAttr(llvm::AttributeSet::get(*Context, Arg_accum->getArgNo() + 1,
375e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                               llvm::makeArrayRef(Attribute::NoCapture)));
376e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
377e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
378e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                       FnExpandedAccumulator);
379e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::IRBuilder<> Builder(Begin);
380e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Builder.CreateRetVoid();
381e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
382e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    return FnExpandedAccumulator;
383e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
384e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
385e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @brief Create an empty loop
386e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
387e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// Create a loop of the form:
388e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
389e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// for (i = LowerBound; i < UpperBound; i++)
390e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///   ;
391e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
392e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// After the loop has been created, the builder is set such that
393e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// instructions can be added to the loop body.
394e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
395e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param Builder The builder to use to build this loop. The current
396e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///                position of the builder is the position the loop
397e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///                will be inserted.
398e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param LowerBound The first value of the loop iterator
399e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param UpperBound The maximal value of the loop iterator
400e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param LoopIV A reference that will be set to the loop iterator.
401e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @return The BasicBlock that will be executed after the loop.
402e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
403e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser                               llvm::Value *LowerBound,
404e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser                               llvm::Value *UpperBound,
405ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo                               llvm::Value **LoopIV) {
406c2ca742d7d0197c52e49467862844463fb42280fDavid Gross    bccAssert(LowerBound->getType() == UpperBound->getType());
407e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
408e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
409ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    llvm::Value *Cond, *IVNext, *IV, *IVVar;
410e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
411e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    CondBB = Builder.GetInsertBlock();
4128e9089377848628813a697b972773e969b942c3bPirama Arumuga Nainar    AfterBB = llvm::SplitBlock(CondBB, &*Builder.GetInsertPoint(), nullptr, nullptr);
413bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
414e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
415ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    CondBB->getTerminator()->eraseFromParent();
416ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    Builder.SetInsertPoint(CondBB);
417ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo
418ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    // decltype(LowerBound) *ivvar = alloca(sizeof(int))
419ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    // *ivvar = LowerBound
420ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    IVVar = Builder.CreateAlloca(LowerBound->getType(), nullptr, BCC_INDEX_VAR_NAME);
421ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    Builder.CreateStore(LowerBound, IVVar);
422ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo
423e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // if (LowerBound < Upperbound)
424e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto LoopHeader
425e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // else
426e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto AfterBB
427e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser    Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
428e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
429e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
430ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    // LoopHeader:
431ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    //   iv = *ivvar
432ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    //   <insertion point here>
433ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    //   iv.next = iv + 1
434ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    //   *ivvar = iv.next
435ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    //   if (iv.next < Upperbound)
436ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    //     goto LoopHeader
437ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    //   else
438ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    //     goto AfterBB
439ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    // AfterBB:
440e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.SetInsertPoint(HeaderBB);
441ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    IV = Builder.CreateLoad(IVVar, "X");
442e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
443ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    Builder.CreateStore(IVNext, IVVar);
444e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser    Cond = Builder.CreateICmpULT(IVNext, UpperBound);
445e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
446e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    AfterBB->setName("Exit");
447ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    Builder.SetInsertPoint(llvm::cast<llvm::Instruction>(IVNext));
448ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo
449ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    // Record information about this loop.
450e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    *LoopIV = IV;
451e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    return AfterBB;
452e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  }
453e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
45428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // Finish building the outgoing argument list for calling a ForEach-able function.
45528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //
45628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // ArgVector - on input, the non-special arguments
45728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //             on output, the non-special arguments combined with the special arguments
45828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //               from SpecialArgVector
45928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // SpecialArgVector - special arguments (from ExpandSpecialArguments())
46028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // SpecialArgContextIdx - return value of ExpandSpecialArguments()
46128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //                          (position of context argument in SpecialArgVector)
46228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // CalleeFunction - the ForEach-able function being called
46328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // Builder - for inserting code into the caller function
46428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  template<unsigned int ArgVectorLen, unsigned int SpecialArgVectorLen>
46528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  void finishArgList(      llvm::SmallVector<llvm::Value *, ArgVectorLen>        &ArgVector,
46628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     const llvm::SmallVector<llvm::Value *, SpecialArgVectorLen> &SpecialArgVector,
46728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     const int SpecialArgContextIdx,
46828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     const llvm::Function &CalleeFunction,
46928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     llvm::IRBuilder<> &CallerBuilder) {
47028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    /* The context argument (if any) is a pointer to an opaque user-visible type that differs from
47128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     * the RsExpandKernelDriverInfoPfx type used in the function we are generating (although the
47228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     * two types represent the same thing).  Therefore, we must introduce a pointer cast when
47328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     * generating a call to the kernel function.
47428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     */
47528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    const int ArgContextIdx =
47628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross        SpecialArgContextIdx >= 0 ? (ArgVector.size() + SpecialArgContextIdx) : SpecialArgContextIdx;
47728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    ArgVector.append(SpecialArgVector.begin(), SpecialArgVector.end());
47828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    if (ArgContextIdx >= 0) {
47928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      llvm::Type *ContextArgType = nullptr;
48028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      int ArgIdx = ArgContextIdx;
48128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      for (const auto &Arg : CalleeFunction.getArgumentList()) {
48228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross        if (!ArgIdx--) {
48328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross          ContextArgType = Arg.getType();
48428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross          break;
48528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross        }
48628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      }
48728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      bccAssert(ContextArgType);
48828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      ArgVector[ArgContextIdx] = CallerBuilder.CreatePointerCast(ArgVector[ArgContextIdx], ContextArgType);
48928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    }
49028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  }
49128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
492083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // GEPHelper() returns a SmallVector of values suitable for passing
493083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // to IRBuilder::CreateGEP(), and SmallGEPIndices is a typedef for
494083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // the returned data type. It is sized so that the SmallVector
495083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // returned by GEPHelper() never needs to do a heap allocation for
496083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // any list of GEP indices it encounters in the code.
497083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  typedef llvm::SmallVector<llvm::Value *, 3> SmallGEPIndices;
498083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
499083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // Helper for turning a list of constant integer GEP indices into a
500083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // SmallVector of llvm::Value*. The return value is suitable for
501083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // passing to a GetElementPtrInst constructor or IRBuilder::CreateGEP().
502083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //
503083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // Inputs:
504083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //   I32Args should be integers which represent the index arguments
505083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //   to a GEP instruction.
506083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //
507083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // Returns:
508083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //   Returns a SmallVector of ConstantInts.
5094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  SmallGEPIndices GEPHelper(const std::initializer_list<int32_t> I32Args) {
510083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    SmallGEPIndices Out(I32Args.size());
511083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::IntegerType *I32Ty = llvm::Type::getInt32Ty(*Context);
512083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    std::transform(I32Args.begin(), I32Args.end(), Out.begin(),
513083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                   [I32Ty](int32_t Arg) { return llvm::ConstantInt::get(I32Ty, Arg); });
514083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    return Out;
515083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  }
516083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
5178ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosserpublic:
5184e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  RSKernelExpandPass(bool pEnableStepOpt = true)
519900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes      : ModulePass(ID), Module(nullptr), Context(nullptr),
520bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes        mEnableStepOpt(pEnableStepOpt) {
521bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
5228ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  }
5238ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
524c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines  virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
525c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines    // This pass does not use any other analysis passes, but it does
526c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines    // add/wrap the existing functions in the module (thus altering the CFG).
527c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines  }
528c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines
52933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // Build contribution to outgoing argument list for calling a
530e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // ForEach-able function or a general reduction accumulator
531e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // function, based on the special parameters of that function.
53233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  //
533e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Signature - metadata bits for the signature of the callee
53433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // X, Arg_p - values derived directly from expanded function,
535e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //            suitable for computing arguments for the callee
53633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // CalleeArgs - contribution is accumulated here
53733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // Bump - invoked once for each contributed outgoing argument
538083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // LoopHeaderInsertionPoint - an Instruction in the loop header, before which
539083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //                            this function can insert loop-invariant loads
54028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //
54128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // Return value is the (zero-based) position of the context (Arg_p)
54228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // argument in the CalleeArgs vector, or a negative value if the
54328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // context argument is not placed in the CalleeArgs vector.
54428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  int ExpandSpecialArguments(uint32_t Signature,
54528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::Value *X,
54628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::Value *Arg_p,
54728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::IRBuilder<> &Builder,
54828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::SmallVector<llvm::Value*, 8> &CalleeArgs,
549083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             std::function<void ()> Bump,
550083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             llvm::Instruction *LoopHeaderInsertionPoint) {
55128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
55228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    bccAssert(CalleeArgs.empty());
55328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
55428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    int Return = -1;
55533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) {
55633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      CalleeArgs.push_back(Arg_p);
55733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      Bump();
55828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      Return = CalleeArgs.size() - 1;
55933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
56033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
56133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
56233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      CalleeArgs.push_back(X);
56333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      Bump();
56433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
56533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
566e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature) ||
567e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
568083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      bccAssert(LoopHeaderInsertionPoint);
56933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
570083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      // Y and Z are loop invariant, so they can be hoisted out of the
571083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      // loop. Set the IRBuilder insertion point to the loop header.
572083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      auto OldInsertionPoint = Builder.saveIP();
573083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      Builder.SetInsertPoint(LoopHeaderInsertionPoint);
574e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
575e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
576083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        SmallGEPIndices YValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent,
577083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          RsLaunchDimensionsFieldY}));
578083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value *YAddr = Builder.CreateInBoundsGEP(Arg_p, YValueGEP, "Y.gep");
579083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        CalleeArgs.push_back(Builder.CreateLoad(YAddr, "Y"));
580e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        Bump();
581e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      }
582e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
583e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
584083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        SmallGEPIndices ZValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent,
585083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          RsLaunchDimensionsFieldZ}));
586083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value *ZAddr = Builder.CreateInBoundsGEP(Arg_p, ZValueGEP, "Z.gep");
587083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        CalleeArgs.push_back(Builder.CreateLoad(ZAddr, "Z"));
588e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        Bump();
589e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      }
590083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
591083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      Builder.restoreIP(OldInsertionPoint);
59233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
59328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
59428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    return Return;
59533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  }
59633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
597e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Generate loop-invariant input processing setup code for an expanded
598e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // ForEach-able function or an expanded general reduction accumulator
599e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // function.
600e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
601e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // LoopHeader - block at the end of which the setup code will be inserted
602e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Arg_p - RSKernelDriverInfo pointer passed to the expanded function
603e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // TBAAPointer - metadata for marking loads of pointer values out of RSKernelDriverInfo
604e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // ArgIter - iterator pointing to first input of the UNexpanded function
605e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // NumInputs - number of inputs (NOT number of ARGUMENTS)
606e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
6077d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen  // InTypes[] - this function saves input type, they will be used in ExpandInputsBody().
6087d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen  // InBufPtrs[] - this function sets each array element to point to the first cell / byte
6097d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen  //               (byte for x86, cell for other platforms) of the corresponding input allocation
610e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // InStructTempSlots[] - this function sets each array element either to nullptr
611e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                       or to the result of an alloca (for the case where the
612e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                       calling convention dictates that a value must be passed
613e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                       by reference, and so we need a stacked temporary to hold
614e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                       a copy of that value)
615e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  void ExpandInputsLoopInvariant(llvm::IRBuilder<> &Builder, llvm::BasicBlock *LoopHeader,
616e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::Value *Arg_p,
617e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::MDNode *TBAAPointer,
618e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::Function::arg_iterator ArgIter,
619e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 const size_t NumInputs,
6207d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen                                 llvm::SmallVectorImpl<llvm::Type *> &InTypes,
621e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs,
622e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots) {
623e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    bccAssert(NumInputs <= RS_KERNEL_INPUT_LIMIT);
624e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
625e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Extract information about input slots. The work done
626e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // here is loop-invariant, so we can hoist the operations out of the loop.
627e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    auto OldInsertionPoint = Builder.saveIP();
628e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Builder.SetInsertPoint(LoopHeader->getTerminator());
629e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
630e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    for (size_t InputIndex = 0; InputIndex < NumInputs; ++InputIndex, ArgIter++) {
631e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::Type *InType = ArgIter->getType();
632e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
633e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      /*
634e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * AArch64 calling conventions dictate that structs of sufficient size
635e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * get passed by pointer instead of passed by value.  This, combined
636e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * with the fact that we don't allow kernels to operate on pointer
637e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * data means that if we see a kernel with a pointer parameter we know
638e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * that it is a struct input that has been promoted.  As such we don't
639e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * need to convert its type to a pointer.  Later we will need to know
640e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * to create a temporary copy on the stack, so we save this information
641e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * in InStructTempSlots.
642e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       */
643e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (auto PtrType = llvm::dyn_cast<llvm::PointerType>(InType)) {
644e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        llvm::Type *ElementType = PtrType->getElementType();
645e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InStructTempSlots.push_back(Builder.CreateAlloca(ElementType, nullptr,
646e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                         "input_struct_slot"));
647e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      } else {
648e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InType = InType->getPointerTo();
649e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InStructTempSlots.push_back(nullptr);
650e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      }
651e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
652e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      SmallGEPIndices InBufPtrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr,
653e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                             static_cast<int32_t>(InputIndex)}));
654e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::Value    *InBufPtrAddr = Builder.CreateInBoundsGEP(Arg_p, InBufPtrGEP, "input_buf.gep");
655e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::LoadInst *InBufPtr = Builder.CreateLoad(InBufPtrAddr, "input_buf");
6567d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen
6577d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen      llvm::Value *CastInBufPtr = nullptr;
6587d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen      if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) {
6597d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        CastInBufPtr = Builder.CreatePointerCast(InBufPtr, InType, "casted_in");
6607d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen      } else {
6617d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        // The disagreement between module and x86 target machine datalayout
6627d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        // causes mismatched input/output data offset between slang reflected
6637d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        // code and bcc codegen for GetElementPtr. To solve this issue, skip the
6647d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        // cast to InType and leave CastInBufPtr as an int8_t*.  The buffer is
6657d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        // later indexed with an explicit byte offset computed based on
6667d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        // X86_CUSTOM_DL_STRING and then bitcast it to actual input type.
6677d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        CastInBufPtr = InBufPtr;
6687d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen      }
669e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
670e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (gEnableRsTbaa) {
671e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InBufPtr->setMetadata("tbaa", TBAAPointer);
672e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      }
673e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
6747d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen      InTypes.push_back(InType);
675e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      InBufPtrs.push_back(CastInBufPtr);
676e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    }
677e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
678e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Builder.restoreIP(OldInsertionPoint);
679e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
680e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
681e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Generate loop-varying input processing code for an expanded ForEach-able function
682e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // or an expanded general reduction accumulator function.  Also, for the call to the
683e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // UNexpanded function, collect the portion of the argument list corresponding to the
684e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // inputs.
685e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
686e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Arg_x1 - first X coordinate to be processed by the expanded function
687e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // TBAAAllocation - metadata for marking loads of input values out of allocations
688e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // NumInputs -- number of inputs (NOT number of ARGUMENTS)
6897d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen  // InTypes[] - this function uses the saved input types in ExpandInputsLoopInvariant()
6907d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen  //             to convert the pointer of byte InPtr to its real type.
691e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // InBufPtrs[] - this function consumes the information produced by ExpandInputsLoopInvariant()
692e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // InStructTempSlots[] - this function consumes the information produced by ExpandInputsLoopInvariant()
693e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // IndVar - value of loop induction variable (X coordinate) for a given loop iteration
694e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
695e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // RootArgs - this function sets this to the list of outgoing argument values corresponding
696e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //            to the inputs
697e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  void ExpandInputsBody(llvm::IRBuilder<> &Builder,
698e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        llvm::Value *Arg_x1,
699e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        llvm::MDNode *TBAAAllocation,
700e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        const size_t NumInputs,
7017d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen                        const llvm::SmallVectorImpl<llvm::Type *> &InTypes,
702e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        const llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs,
703e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        const llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots,
704e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        llvm::Value *IndVar,
705e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        llvm::SmallVectorImpl<llvm::Value *> &RootArgs) {
706e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Offset = Builder.CreateSub(IndVar, Arg_x1);
7077d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
708e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
709e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    for (size_t Index = 0; Index < NumInputs; ++Index) {
710e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
7117d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen      llvm::Value *InPtr = nullptr;
7127d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen      if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) {
7137d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], Offset);
7147d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen      } else {
7157d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        // Treat x86 input buffer as byte[], get indexed pointer with explicit
7167d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        // byte offset computed using a datalayout based on
7177d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        // X86_CUSTOM_DL_STRING, then bitcast it to actual input type.
7187d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        llvm::DataLayout DL(X86_CUSTOM_DL_STRING);
7197d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        llvm::Type *InTy = InTypes[Index];
7207d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        uint64_t InStep = DL.getTypeAllocSize(InTy->getPointerElementType());
7217d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        llvm::Value *OffsetInBytes = Builder.CreateMul(Offset, llvm::ConstantInt::get(Int32Ty, InStep));
7227d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], OffsetInBytes);
7237d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        InPtr = Builder.CreatePointerCast(InPtr, InTy);
7247d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen      }
7257d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen
7267d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen      llvm::Value *Input;
727e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
728e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
729e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (gEnableRsTbaa) {
730e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InputLoad->setMetadata("tbaa", TBAAAllocation);
731e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      }
732e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
733e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (llvm::Value *TemporarySlot = InStructTempSlots[Index]) {
734e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // Pass a pointer to a temporary on the stack, rather than
735e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // passing a pointer to the original value. We do not want
736e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // the kernel to potentially modify the input data.
737e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
738e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // Note: don't annotate with TBAA, since the kernel might
739e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // have its own TBAA annotations for the pointer argument.
740e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        Builder.CreateStore(InputLoad, TemporarySlot);
741e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        Input = TemporarySlot;
742e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      } else {
743e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        Input = InputLoad;
744e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      }
745e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
746e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      RootArgs.push_back(Input);
747e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    }
748e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
749e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
7508ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  /* Performs the actual optimization on a selected function. On success, the
7518ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   * Module will contain a new function of the name "<NAME>.expand" that
7528ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   * invokes <NAME>() in a loop with the appropriate parameters.
7538ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   */
7544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  bool ExpandOldStyleForEach(llvm::Function *Function, uint32_t Signature) {
755bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    ALOGV("Expanding ForEach-able Function %s",
756bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes          Function->getName().str().c_str());
7578ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
7588ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser    if (!Signature) {
759bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      Signature = getRootSignature(Function);
7608ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser      if (!Signature) {
7618ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        // We couldn't determine how to expand this function based on its
7628ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        // function signature.
7638ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        return false;
7648ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser      }
7658ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser    }
7668ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
767bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::DataLayout DL(Module);
7687d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen    if (Module->getTargetTriple() == DEFAULT_X86_TRIPLE_STRING) {
7697d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen      DL.reset(X86_CUSTOM_DL_STRING);
7707d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen    }
7718ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
772bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
7734e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      createEmptyExpandedForEachKernel(Function->getName());
774db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
775bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    /*
776bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * Extract the expanded function's parameters.  It is guaranteed by
777e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross     * createEmptyExpandedForEachKernel that there will be four parameters.
778bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     */
77933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
7804e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
78133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
782bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator ExpandedFunctionArgIter =
783bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      ExpandedFunction->arg_begin();
784db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
785bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
786bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
787bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
7885010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes    llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
789bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
790900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *InStep  = nullptr;
791900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutStep = nullptr;
792db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
793db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // Construct the actual function body.
7948e9089377848628813a697b972773e969b942c3bPirama Arumuga Nainar    llvm::IRBuilder<> Builder(&*ExpandedFunction->getEntryBlock().begin());
795db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
796cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // Collect and construct the arguments for the kernel().
797db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // Note that we load any loop-invariant arguments before entering the Loop.
798bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
799db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
800900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Type  *InTy      = nullptr;
801083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::Value *InBufPtr = nullptr;
802d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
803083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices InStepGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInStride, 0}));
804083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      llvm::LoadInst *InStepArg  = Builder.CreateLoad(
805083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        Builder.CreateInBoundsGEP(Arg_p, InStepGEP, "instep_addr.gep"), "instep_addr");
806e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
807bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      InTy = (FunctionArgIter++)->getType();
808e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes      InStep = getStepValue(&DL, InTy, InStepArg);
809e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
8102b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      InStep->setName("instep");
811e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
812083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices InputAddrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr, 0}));
813083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      InBufPtr = Builder.CreateLoad(
814083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        Builder.CreateInBoundsGEP(Arg_p, InputAddrGEP, "input_buf.gep"), "input_buf");
815db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
816db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
817900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Type *OutTy = nullptr;
818900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutBasePtr = nullptr;
819d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
820bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      OutTy = (FunctionArgIter++)->getType();
821b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
8222b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      OutStep->setName("outstep");
823083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0}));
824083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep"));
825db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
826db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
827900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *UsrData = nullptr;
828d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
829bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
830083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      llvm::Value *UsrDataPointerAddr = Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldUsr);
831083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      UsrData = Builder.CreatePointerCast(Builder.CreateLoad(UsrDataPointerAddr), UsrDataTy);
832db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      UsrData->setName("UsrData");
833db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
834db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
835083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
836ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    llvm::Value *IV;
83733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    createLoop(Builder, Arg_x1, Arg_x2, &IV);
838097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
83933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
84028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    const int CalleeArgsContextIdx = ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
841083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                                                            [&FunctionArgIter]() { FunctionArgIter++; },
842083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                                                            LoopHeader->getTerminator());
843db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
844bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bccAssert(FunctionArgIter == Function->arg_end());
845db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
846cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // Populate the actual call to kernel().
847db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::SmallVector<llvm::Value*, 8> RootArgs;
848db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
849900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *InPtr  = nullptr;
850900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutPtr = nullptr;
851db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
852ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    // Calculate the current input and output pointers
85302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    //
854ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    // We always calculate the input/output pointers with a GEP operating on i8
85502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // values and only cast at the very end to OutTy. This is because the step
85602f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // between two values is given in bytes.
85702f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    //
85802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // TODO: We could further optimize the output by using a GEP operation of
85902f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // type 'OutTy' in cases where the element type of the allocation allows.
86002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    if (OutBasePtr) {
86102f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
86202f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      OutOffset = Builder.CreateMul(OutOffset, OutStep);
863083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutPtr = Builder.CreateInBoundsGEP(OutBasePtr, OutOffset);
86402f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
86502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    }
866bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
867083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    if (InBufPtr) {
868ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
869ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      InOffset = Builder.CreateMul(InOffset, InStep);
870083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      InPtr = Builder.CreateInBoundsGEP(InBufPtr, InOffset);
871ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      InPtr = Builder.CreatePointerCast(InPtr, InTy);
872ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    }
87302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser
874ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    if (InPtr) {
8757ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      RootArgs.push_back(InPtr);
876db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
877db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
87802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    if (OutPtr) {
8797ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      RootArgs.push_back(OutPtr);
880db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
881db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
882db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (UsrData) {
883db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      RootArgs.push_back(UsrData);
884db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
885db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
88628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
887db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
888bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    Builder.CreateCall(Function, RootArgs);
889db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
8907ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    return true;
8917ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines  }
8927ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
8934e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  /* Expand a pass-by-value foreach kernel.
8947ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines   */
8954e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  bool ExpandForEach(llvm::Function *Function, uint32_t Signature) {
896d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
897bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
8987ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
8994e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // TODO: Refactor this to share functionality with ExpandOldStyleForEach.
900bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::DataLayout DL(Module);
9017d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen    if (Module->getTargetTriple() == DEFAULT_X86_TRIPLE_STRING) {
9027d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen      DL.reset(X86_CUSTOM_DL_STRING);
9037d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen    }
9047d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
9057ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
906bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
9074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      createEmptyExpandedForEachKernel(Function->getName());
9087ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
909bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    /*
910bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * Extract the expanded function's parameters.  It is guaranteed by
911e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross     * createEmptyExpandedForEachKernel that there will be four parameters.
912bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     */
913881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
9144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
915881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
916bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator ExpandedFunctionArgIter =
917bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      ExpandedFunction->arg_begin();
918bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
919bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
920bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
921bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
9223bc475b206c3fa249a212b90fe989fdcda4d75f9Matt Wala    // Arg_outstep is not used by expanded new-style forEach kernels.
9237ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
9247ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    // Construct the actual function body.
9258e9089377848628813a697b972773e969b942c3bPirama Arumuga Nainar    llvm::IRBuilder<> Builder(&*ExpandedFunction->getEntryBlock().begin());
9267ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
92718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Create TBAA meta-data.
928354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript,
929354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines                 *TBAAAllocation, *TBAAPointer;
930bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDBuilder MDHelper(*Context);
93114588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien
932354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    TBAARenderScriptDistinct =
9334e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      MDHelper.createTBAARoot(kRenderScriptTBAARootName);
9344e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
935354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines        TBAARenderScriptDistinct);
936e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
937e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                       TBAARenderScript);
938e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
939e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                      TBAAAllocation, 0);
940e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
941e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                    TBAARenderScript);
94214588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien    TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
94318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
944881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    /*
945881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     * Collect and construct the arguments for the kernel().
946881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     *
947881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     * Note that we load any loop-invariant arguments before entering the Loop.
948881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     */
949083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    size_t NumRemainingInputs = Function->arg_size();
9507ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
951881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // No usrData parameter on kernels.
952881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    bccAssert(
953881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
954881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
955881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    llvm::Function::arg_iterator ArgIter = Function->arg_begin();
956881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
957881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Check the return type
958bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::Type     *OutTy            = nullptr;
959bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::LoadInst *OutBasePtr       = nullptr;
960bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::Value    *CastedOutBasePtr = nullptr;
961881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
962e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    bool PassOutByPointer = false;
963881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
964d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
965bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *OutBaseTy = Function->getReturnType();
966881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
96774a4b08235990916911b8fe758d656c1171faf26Stephen Hines      if (OutBaseTy->isVoidTy()) {
968e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes        PassOutByPointer = true;
969881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        OutTy = ArgIter->getType();
970881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
971881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        ArgIter++;
972083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        --NumRemainingInputs;
97374a4b08235990916911b8fe758d656c1171faf26Stephen Hines      } else {
97474a4b08235990916911b8fe758d656c1171faf26Stephen Hines        // We don't increment Args, since we are using the actual return type.
975881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        OutTy = OutBaseTy->getPointerTo();
97674a4b08235990916911b8fe758d656c1171faf26Stephen Hines      }
977881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
978083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0}));
979083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep"));
980097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
9819c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      if (gEnableRsTbaa) {
9829c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines        OutBasePtr->setMetadata("tbaa", TBAAPointer);
9839c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      }
98450f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray
9857d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen      if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) {
9867d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out");
9877d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen      } else {
9887d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        // The disagreement between module and x86 target machine datalayout
9897d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        // causes mismatched input/output data offset between slang reflected
9907d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        // code and bcc codegen for GetElementPtr. To solve this issue, skip the
9917d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        // cast to OutTy and leave CastedOutBasePtr as an int8_t*.  The buffer
9927d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        // is later indexed with an explicit byte offset computed based on
9937d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        // X86_CUSTOM_DL_STRING and then bitcast it to actual output type.
9947d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        CastedOutBasePtr = OutBasePtr;
9957d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen      }
99674a4b08235990916911b8fe758d656c1171faf26Stephen Hines    }
99774a4b08235990916911b8fe758d656c1171faf26Stephen Hines
9987d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen    llvm::SmallVector<llvm::Type*,  8> InTypes;
999083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
1000d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala    llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
1001881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
1002083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    bccAssert(NumRemainingInputs <= RS_KERNEL_INPUT_LIMIT);
1003881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
1004083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // Create the loop structure.
1005083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
1006ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    llvm::Value *IV;
1007083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    createLoop(Builder, Arg_x1, Arg_x2, &IV);
1008881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
1009083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
1010083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    const int CalleeArgsContextIdx =
1011083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
1012083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             [&NumRemainingInputs]() { --NumRemainingInputs; },
1013083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             LoopHeader->getTerminator());
1014083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
1015083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // After ExpandSpecialArguments() gets called, NumRemainingInputs
1016083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // counts the number of arguments to the kernel that correspond to
1017083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // an array entry from the InPtr field of the DriverInfo
1018083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // structure.
1019083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    const size_t NumInPtrArguments = NumRemainingInputs;
1020083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
1021083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    if (NumInPtrArguments > 0) {
1022e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, ArgIter, NumInPtrArguments,
10237d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen                                InTypes, InBufPtrs, InStructTempSlots);
1024881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    }
10257ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
10267ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    // Populate the actual call to kernel().
10277ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    llvm::SmallVector<llvm::Value*, 8> RootArgs;
10287ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
10299296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala    // Calculate the current input and output pointers.
1030881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
1031881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Output
1032881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
1033900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutPtr = nullptr;
1034bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    if (CastedOutBasePtr) {
10357b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
10367d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen
10377d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen      if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) {
10387d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffset);
10397d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen      } else {
10407d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        // Treat x86 output buffer as byte[], get indexed pointer with explicit
10417d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        // byte offset computed using a datalayout based on
10427d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        // X86_CUSTOM_DL_STRING, then bitcast it to actual output type.
10437d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        uint64_t OutStep = DL.getTypeAllocSize(OutTy->getPointerElementType());
10447d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        llvm::Value *OutOffsetInBytes = Builder.CreateMul(OutOffset, llvm::ConstantInt::get(Int32Ty, OutStep));
10457d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffsetInBytes);
10467d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen        OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
10477d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen      }
1048bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
1049e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes      if (PassOutByPointer) {
1050881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        RootArgs.push_back(OutPtr);
1051881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes      }
10524102bec56151fb5d9c962fb298412f34a6eacaa8Tobias Grosser    }
10537b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser
1054881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Inputs
105574a4b08235990916911b8fe758d656c1171faf26Stephen Hines
1056083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    if (NumInPtrArguments > 0) {
1057e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInPtrArguments,
10587d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen                       InTypes, InBufPtrs, InStructTempSlots, IV, RootArgs);
10597ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
10607ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
106128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
10627ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
1063bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
10647ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
1065e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    if (OutPtr && !PassOutByPointer) {
10669296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala      RetVal->setName("call.result");
106718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
10689c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      if (gEnableRsTbaa) {
10699c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines        Store->setMetadata("tbaa", TBAAAllocation);
10709c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      }
10717ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
10727ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
1073db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return true;
1074db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
1075db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1076e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Certain categories of functions that make up a general
1077e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // reduce-style kernel are called directly from the driver with no
1078e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // expansion needed.  For a function in such a category, we need to
1079e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // promote linkage from static to external, to ensure that the
1080e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // function is visible to the driver in the dynamic symbol table.
1081e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // This promotion is safe because we don't have any kind of cross
1082e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // translation unit linkage model (except for linking against
1083e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // RenderScript libraries), so we do not risk name clashes.
1084a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross  bool PromoteReduceFunction(const char *Name, FunctionSet &PromotedFunctions) {
1085e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    if (!Name)  // a presumably-optional function that is not present
1086e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      return false;
1087e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1088e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Function *Fn = Module->getFunction(Name);
1089e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    bccAssert(Fn != nullptr);
1090e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    if (PromotedFunctions.insert(Fn).second) {
1091e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      bccAssert(Fn->getLinkage() == llvm::GlobalValue::InternalLinkage);
1092e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      Fn->setLinkage(llvm::GlobalValue::ExternalLinkage);
1093e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      return true;
1094e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    }
1095e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1096e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    return false;
1097e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
1098e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1099e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Expand the accumulator function for a general reduce-style kernel.
1100e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1101e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // The input is a function of the form
1102e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1103e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //   define void @func(accumType* %accum, foo1 in1[, ... fooN inN] [, special arguments])
1104e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1105e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // where all arguments except the first are the same as for a foreach kernel.
1106e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1107e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // The input accumulator function gets expanded into a function of the form
1108e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1109e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //   define void @func.expand(%RsExpandKernelDriverInfoPfx* %p, i32 %x1, i32 %x2, accumType* %accum)
1110e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1111e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // which performs a serial accumulaion of elements [x1, x2) into *%accum.
1112e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1113e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // In pseudocode, @func.expand does:
1114e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1115e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //   for (i = %x1; i < %x2; ++i) {
1116e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //     func(%accum,
1117e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //          *((foo1 *)p->inPtr[0] + i)[, ... *((fooN *)p->inPtr[N-1] + i)
1118e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //          [, p] [, i] [, p->current.y] [, p->current.z]);
1119e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //   }
1120e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1121e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // This is very similar to foreach kernel expansion with no output.
1122a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross  bool ExpandReduceAccumulator(llvm::Function *FnAccumulator, uint32_t Signature, size_t NumInputs) {
1123e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    ALOGV("Expanding accumulator %s for general reduce kernel",
1124e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross          FnAccumulator->getName().str().c_str());
1125e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1126e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Create TBAA meta-data.
1127e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript,
1128e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                 *TBAAAllocation, *TBAAPointer;
1129e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::MDBuilder MDHelper(*Context);
1130e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAARenderScriptDistinct =
1131e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      MDHelper.createTBAARoot(kRenderScriptTBAARootName);
1132e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
1133e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        TBAARenderScriptDistinct);
1134e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
1135e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                       TBAARenderScript);
1136e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
1137e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                      TBAAAllocation, 0);
1138e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
1139e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                    TBAARenderScript);
1140e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
1141e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1142e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    auto AccumulatorArgIter = FnAccumulator->arg_begin();
1143e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1144e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Create empty accumulator function.
1145e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Function *FnExpandedAccumulator =
1146a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross        createEmptyExpandedReduceAccumulator(FnAccumulator->getName(),
1147a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross                                             (AccumulatorArgIter++)->getType());
1148e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1149e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Extract the expanded accumulator's parameters.  It is
1150a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross    // guaranteed by createEmptyExpandedReduceAccumulator that
1151e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // there will be 4 parameters.
1152a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross    bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceAccumulatorParams);
1153e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    auto ExpandedAccumulatorArgIter = FnExpandedAccumulator->arg_begin();
1154e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Arg_p     = &*(ExpandedAccumulatorArgIter++);
1155e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Arg_x1    = &*(ExpandedAccumulatorArgIter++);
1156e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Arg_x2    = &*(ExpandedAccumulatorArgIter++);
1157e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Arg_accum = &*(ExpandedAccumulatorArgIter++);
1158e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1159e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Construct the actual function body.
11608e9089377848628813a697b972773e969b942c3bPirama Arumuga Nainar    llvm::IRBuilder<> Builder(&*FnExpandedAccumulator->getEntryBlock().begin());
1161e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1162e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Create the loop structure.
1163e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
1164ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo    llvm::Value *IndVar;
1165e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    createLoop(Builder, Arg_x1, Arg_x2, &IndVar);
1166e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1167e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
1168e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    const int CalleeArgsContextIdx =
1169e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        ExpandSpecialArguments(Signature, IndVar, Arg_p, Builder, CalleeArgs,
1170e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                               [](){}, LoopHeader->getTerminator());
1171e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
11727d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen    llvm::SmallVector<llvm::Type*,  8> InTypes;
1173e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
1174e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
1175e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, AccumulatorArgIter, NumInputs,
11767d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen                              InTypes, InBufPtrs, InStructTempSlots);
1177e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1178e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Populate the actual call to the original accumulator.
1179e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::SmallVector<llvm::Value*, 8> RootArgs;
1180e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    RootArgs.push_back(Arg_accum);
11817d6cde16bde7c6c65d47b51bb992bdfb94077393Yong Chen    ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInputs, InTypes, InBufPtrs, InStructTempSlots,
1182e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                     IndVar, RootArgs);
1183e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *FnAccumulator, Builder);
1184e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Builder.CreateCall(FnAccumulator, RootArgs);
1185e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1186e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    return true;
1187e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
1188e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
118957fd9f882f3359be4201c42b02aebf785d311df2David Gross  // Create a combiner function for a general reduce-style kernel that lacks one,
119057fd9f882f3359be4201c42b02aebf785d311df2David Gross  // by calling the accumulator function.
119157fd9f882f3359be4201c42b02aebf785d311df2David Gross  //
119257fd9f882f3359be4201c42b02aebf785d311df2David Gross  // The accumulator function must be of the form
119357fd9f882f3359be4201c42b02aebf785d311df2David Gross  //
119457fd9f882f3359be4201c42b02aebf785d311df2David Gross  //   define void @accumFn(accumType* %accum, accumType %in)
119557fd9f882f3359be4201c42b02aebf785d311df2David Gross  //
119657fd9f882f3359be4201c42b02aebf785d311df2David Gross  // A combiner function will be generated of the form
119757fd9f882f3359be4201c42b02aebf785d311df2David Gross  //
119857fd9f882f3359be4201c42b02aebf785d311df2David Gross  //   define void @accumFn.combiner(accumType* %accum, accumType* %other) {
119957fd9f882f3359be4201c42b02aebf785d311df2David Gross  //     %1 = load accumType, accumType* %other
120057fd9f882f3359be4201c42b02aebf785d311df2David Gross  //     call void @accumFn(accumType* %accum, accumType %1);
120157fd9f882f3359be4201c42b02aebf785d311df2David Gross  //   }
1202a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross  bool CreateReduceCombinerFromAccumulator(llvm::Function *FnAccumulator) {
120357fd9f882f3359be4201c42b02aebf785d311df2David Gross    ALOGV("Creating combiner from accumulator %s for general reduce kernel",
120457fd9f882f3359be4201c42b02aebf785d311df2David Gross          FnAccumulator->getName().str().c_str());
120557fd9f882f3359be4201c42b02aebf785d311df2David Gross
120657fd9f882f3359be4201c42b02aebf785d311df2David Gross    using llvm::Attribute;
120757fd9f882f3359be4201c42b02aebf785d311df2David Gross
120857fd9f882f3359be4201c42b02aebf785d311df2David Gross    bccAssert(FnAccumulator->arg_size() == 2);
120957fd9f882f3359be4201c42b02aebf785d311df2David Gross    auto AccumulatorArgIter = FnAccumulator->arg_begin();
121057fd9f882f3359be4201c42b02aebf785d311df2David Gross    llvm::Value *AccumulatorArg_accum = &*(AccumulatorArgIter++);
121157fd9f882f3359be4201c42b02aebf785d311df2David Gross    llvm::Value *AccumulatorArg_in    = &*(AccumulatorArgIter++);
121257fd9f882f3359be4201c42b02aebf785d311df2David Gross    llvm::Type *AccumulatorArgType = AccumulatorArg_accum->getType();
121357fd9f882f3359be4201c42b02aebf785d311df2David Gross    bccAssert(AccumulatorArgType->isPointerTy());
121457fd9f882f3359be4201c42b02aebf785d311df2David Gross
121557fd9f882f3359be4201c42b02aebf785d311df2David Gross    llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
121657fd9f882f3359be4201c42b02aebf785d311df2David Gross    llvm::FunctionType *CombinerType =
121757fd9f882f3359be4201c42b02aebf785d311df2David Gross        llvm::FunctionType::get(VoidTy, { AccumulatorArgType, AccumulatorArgType }, false);
121857fd9f882f3359be4201c42b02aebf785d311df2David Gross    llvm::Function *FnCombiner =
121957fd9f882f3359be4201c42b02aebf785d311df2David Gross        llvm::Function::Create(CombinerType, llvm::GlobalValue::ExternalLinkage,
1220a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross                               nameReduceCombinerFromAccumulator(FnAccumulator->getName()),
122157fd9f882f3359be4201c42b02aebf785d311df2David Gross                               Module);
122257fd9f882f3359be4201c42b02aebf785d311df2David Gross
122357fd9f882f3359be4201c42b02aebf785d311df2David Gross    auto CombinerArgIter = FnCombiner->arg_begin();
122457fd9f882f3359be4201c42b02aebf785d311df2David Gross
122557fd9f882f3359be4201c42b02aebf785d311df2David Gross    llvm::Argument *CombinerArg_accum = &(*CombinerArgIter++);
122657fd9f882f3359be4201c42b02aebf785d311df2David Gross    CombinerArg_accum->setName("accum");
122757fd9f882f3359be4201c42b02aebf785d311df2David Gross    CombinerArg_accum->addAttr(llvm::AttributeSet::get(*Context, CombinerArg_accum->getArgNo() + 1,
122857fd9f882f3359be4201c42b02aebf785d311df2David Gross                                                       llvm::makeArrayRef(Attribute::NoCapture)));
122957fd9f882f3359be4201c42b02aebf785d311df2David Gross
123057fd9f882f3359be4201c42b02aebf785d311df2David Gross    llvm::Argument *CombinerArg_other = &(*CombinerArgIter++);
123157fd9f882f3359be4201c42b02aebf785d311df2David Gross    CombinerArg_other->setName("other");
123257fd9f882f3359be4201c42b02aebf785d311df2David Gross    CombinerArg_other->addAttr(llvm::AttributeSet::get(*Context, CombinerArg_other->getArgNo() + 1,
123357fd9f882f3359be4201c42b02aebf785d311df2David Gross                                                       llvm::makeArrayRef(Attribute::NoCapture)));
123457fd9f882f3359be4201c42b02aebf785d311df2David Gross
123557fd9f882f3359be4201c42b02aebf785d311df2David Gross    llvm::BasicBlock *BB = llvm::BasicBlock::Create(*Context, "BB", FnCombiner);
123657fd9f882f3359be4201c42b02aebf785d311df2David Gross    llvm::IRBuilder<> Builder(BB);
123757fd9f882f3359be4201c42b02aebf785d311df2David Gross
123857fd9f882f3359be4201c42b02aebf785d311df2David Gross    if (AccumulatorArg_in->getType()->isPointerTy()) {
123957fd9f882f3359be4201c42b02aebf785d311df2David Gross      // Types of sufficient size get passed by pointer-to-copy rather
124057fd9f882f3359be4201c42b02aebf785d311df2David Gross      // than passed by value.  An accumulator cannot take a pointer
124157fd9f882f3359be4201c42b02aebf785d311df2David Gross      // at the user level; so if we see a pointer here, we know that
124257fd9f882f3359be4201c42b02aebf785d311df2David Gross      // we have a pass-by-pointer-to-copy case.
124357fd9f882f3359be4201c42b02aebf785d311df2David Gross      llvm::Type *ElementType = AccumulatorArg_in->getType()->getPointerElementType();
124457fd9f882f3359be4201c42b02aebf785d311df2David Gross      llvm::Value *TempMem = Builder.CreateAlloca(ElementType, nullptr, "caller_copy");
124557fd9f882f3359be4201c42b02aebf785d311df2David Gross      Builder.CreateStore(Builder.CreateLoad(CombinerArg_other), TempMem);
124657fd9f882f3359be4201c42b02aebf785d311df2David Gross      Builder.CreateCall(FnAccumulator, { CombinerArg_accum, TempMem });
124757fd9f882f3359be4201c42b02aebf785d311df2David Gross    } else {
124857fd9f882f3359be4201c42b02aebf785d311df2David Gross      llvm::Value *TypeAdjustedOther = CombinerArg_other;
124957fd9f882f3359be4201c42b02aebf785d311df2David Gross      if (AccumulatorArgType->getPointerElementType() != AccumulatorArg_in->getType()) {
125057fd9f882f3359be4201c42b02aebf785d311df2David Gross        // Call lowering by frontend has done some type coercion
125157fd9f882f3359be4201c42b02aebf785d311df2David Gross        TypeAdjustedOther = Builder.CreatePointerCast(CombinerArg_other,
125257fd9f882f3359be4201c42b02aebf785d311df2David Gross                                                      AccumulatorArg_in->getType()->getPointerTo(),
125357fd9f882f3359be4201c42b02aebf785d311df2David Gross                                                      "cast");
125457fd9f882f3359be4201c42b02aebf785d311df2David Gross      }
125557fd9f882f3359be4201c42b02aebf785d311df2David Gross      llvm::Value *DerefOther = Builder.CreateLoad(TypeAdjustedOther);
125657fd9f882f3359be4201c42b02aebf785d311df2David Gross      Builder.CreateCall(FnAccumulator, { CombinerArg_accum, DerefOther });
125757fd9f882f3359be4201c42b02aebf785d311df2David Gross    }
125857fd9f882f3359be4201c42b02aebf785d311df2David Gross    Builder.CreateRetVoid();
125957fd9f882f3359be4201c42b02aebf785d311df2David Gross
126057fd9f882f3359be4201c42b02aebf785d311df2David Gross    return true;
126157fd9f882f3359be4201c42b02aebf785d311df2David Gross  }
126257fd9f882f3359be4201c42b02aebf785d311df2David Gross
126318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// @brief Checks if pointers to allocation internals are exposed
126418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  ///
126518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// This function verifies if through the parameters passed to the kernel
126618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// or through calls to the runtime library the script gains access to
126718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// pointers pointing to data within a RenderScript Allocation.
126818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// If we know we control all loads from and stores to data within
126918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// RenderScript allocations and if we know the run-time internal accesses
127018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// are all annotated with RenderScript TBAA metadata, only then we
127118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// can safely use TBAA to distinguish between generic and from-allocation
127218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// pointers.
1273bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  bool allocPointersExposed(llvm::Module &Module) {
127418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Old style kernel function can expose pointers to elements within
127518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // allocations.
127618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // TODO: Extend analysis to allow simple cases of old-style kernels.
127725eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    for (size_t i = 0; i < mExportForEachCount; ++i) {
127825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      const char *Name = mExportForEachNameList[i];
127925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      uint32_t Signature = mExportForEachSignatureList[i];
1280bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (Module.getFunction(Name) &&
1281d88177580db4ddedf680854c51db333c97eabc59Stephen Hines          !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
128218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
128318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
128418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
128518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
128618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Check for library functions that expose a pointer to an Allocation or
128718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // that are not yet annotated with RenderScript-specific tbaa information.
1288e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala    static const std::vector<const char *> Funcs{
1289e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAt(...)
1290e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsGetElementAt13rs_allocationj",
1291e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsGetElementAt13rs_allocationjj",
1292e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsGetElementAt13rs_allocationjjj",
1293e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1294e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsSetElementAt()
1295e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsSetElementAt13rs_allocationPvj",
1296e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsSetElementAt13rs_allocationPvjj",
1297e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsSetElementAt13rs_allocationPvjjj",
1298e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1299e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAtYuv_uchar_Y()
1300e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj",
1301e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1302e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAtYuv_uchar_U()
1303e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj",
1304e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1305e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAtYuv_uchar_V()
1306e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj",
1307e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala    };
1308e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1309e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala    for (auto FI : Funcs) {
1310e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      llvm::Function *Function = Module.getFunction(FI);
131118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1312bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (!Function) {
1313e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala        ALOGE("Missing run-time function '%s'", FI);
131418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
131518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
131618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1317bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (Function->getNumUses() > 0) {
131818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
131918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
132018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
132118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
132218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    return false;
132318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  }
132418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
132518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
132618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  ///
132718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// The TBAA metadata used to annotate loads/stores from RenderScript
1328e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// Allocations is generated in a separate TBAA tree with a
1329354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// "RenderScript Distinct TBAA" root node. LLVM does assume may-alias for
1330354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// all nodes in unrelated alias analysis trees. This function makes the
1331354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// "RenderScript TBAA" node (which is parented by the Distinct TBAA root),
1332e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With
1333e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// the connected trees every access to an Allocation is resolved to
1334e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// must-alias if compared to a normal C/C++ access.
1335bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
1336bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDBuilder MDHelper(*Context);
1337354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScriptDistinct =
1338354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines      MDHelper.createTBAARoot("RenderScript Distinct TBAA");
1339354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScript = MDHelper.createTBAANode(
1340354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines        "RenderScript TBAA", TBAARenderScriptDistinct);
1341bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDNode *TBAARoot     = MDHelper.createTBAARoot("Simple C/C++ TBAA");
1342354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    TBAARenderScript->replaceOperandWith(1, TBAARoot);
134318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  }
134418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1345bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  virtual bool runOnModule(llvm::Module &Module) {
1346bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bool Changed  = false;
1347bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    this->Module  = &Module;
13484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    Context = &Module.getContext();
1349bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
13504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    buildTypes();
1351bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
1352bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bcinfo::MetadataExtractor me(&Module);
135325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    if (!me.extract()) {
135425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      ALOGE("Could not extract metadata from module!");
135525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      return false;
135625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    }
13574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
13584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Expand forEach_* style kernels.
135925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachCount = me.getExportForEachSignatureCount();
136025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachNameList = me.getExportForEachNameList();
136125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachSignatureList = me.getExportForEachSignatureList();
1362db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
136325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    for (size_t i = 0; i < mExportForEachCount; ++i) {
136425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      const char *name = mExportForEachNameList[i];
136525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      uint32_t signature = mExportForEachSignatureList[i];
1366bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Function *kernel = Module.getFunction(name);
1367cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser      if (kernel) {
1368d88177580db4ddedf680854c51db333c97eabc59Stephen Hines        if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
13694e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala          Changed |= ExpandForEach(kernel, signature);
1370acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
1371acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        } else if (kernel->getReturnType()->isVoidTy()) {
13724e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala          Changed |= ExpandOldStyleForEach(kernel, signature);
1373acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
1374acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        } else {
1375acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // There are some graphics root functions that are not
1376acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // expanded, but that will be called directly. For those
1377acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // functions, we can not set the linkage to internal.
1378acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        }
1379cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines      }
1380db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
1381db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1382e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Process general reduce_* style functions.
1383a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross    const size_t ExportReduceCount = me.getExportReduceCount();
1384a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross    const bcinfo::MetadataExtractor::Reduce *ExportReduceList = me.getExportReduceList();
1385e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    //   Note that functions can be shared between kernels
138657fd9f882f3359be4201c42b02aebf785d311df2David Gross    FunctionSet PromotedFunctions, ExpandedAccumulators, AccumulatorsForCombiners;
1387e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1388a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross    for (size_t i = 0; i < ExportReduceCount; ++i) {
1389a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross      Changed |= PromoteReduceFunction(ExportReduceList[i].mInitializerName, PromotedFunctions);
1390a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross      Changed |= PromoteReduceFunction(ExportReduceList[i].mCombinerName, PromotedFunctions);
1391a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross      Changed |= PromoteReduceFunction(ExportReduceList[i].mOutConverterName, PromotedFunctions);
1392e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1393e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      // Accumulator
1394a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross      llvm::Function *accumulator = Module.getFunction(ExportReduceList[i].mAccumulatorName);
1395e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      bccAssert(accumulator != nullptr);
1396e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (ExpandedAccumulators.insert(accumulator).second)
1397a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross        Changed |= ExpandReduceAccumulator(accumulator,
1398a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross                                           ExportReduceList[i].mSignature,
1399a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross                                           ExportReduceList[i].mInputCount);
1400a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross      if (!ExportReduceList[i].mCombinerName) {
140157fd9f882f3359be4201c42b02aebf785d311df2David Gross        if (AccumulatorsForCombiners.insert(accumulator).second)
1402a48ea364652efcf947dd33c8a6ba893e9c00dd6aDavid Gross          Changed |= CreateReduceCombinerFromAccumulator(accumulator);
140357fd9f882f3359be4201c42b02aebf785d311df2David Gross      }
1404e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    }
1405e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
14064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (gEnableRsTbaa && !allocPointersExposed(Module)) {
1407bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      connectRenderScriptTBAAMetadata(Module);
140818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
140918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1410cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    return Changed;
1411db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
1412db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1413db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  virtual const char *getPassName() const {
14144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    return "forEach_* and reduce_* function expansion";
1415db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
1416db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
14174e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala}; // end RSKernelExpandPass
1418db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
14197a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end anonymous namespace
14207a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
14214e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walachar RSKernelExpandPass::ID = 0;
14224e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walastatic llvm::RegisterPass<RSKernelExpandPass> X("kernelexp", "Kernel Expand Pass");
1423db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1424db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace bcc {
1425db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1426ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leoconst char BCC_INDEX_VAR_NAME[] = "rsIndex";
1427ba1a8f1e6f3eb5b7069e9ba1575f16e393c84c23Dean De Leo
14287a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaollvm::ModulePass *
14294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt WalacreateRSKernelExpandPass(bool pEnableStepOpt) {
14304e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  return new RSKernelExpandPass(pEnableStepOpt);
14317a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao}
1432db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
14337a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end namespace bcc
1434