RSKernelExpand.cpp revision 9296edce10caec9c901f24b65e7d54c8ffe9131c
1db169187dea4602e4ad32058762d23d474753fd0Stephen Hines/*
2db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Copyright 2012, The Android Open Source Project
3db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
4db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Licensed under the Apache License, Version 2.0 (the "License");
5db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * you may not use this file except in compliance with the License.
6db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * You may obtain a copy of the License at
7db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
8db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *     http://www.apache.org/licenses/LICENSE-2.0
9db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
10db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Unless required by applicable law or agreed to in writing, software
11db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * distributed under the License is distributed on an "AS IS" BASIS,
12db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * See the License for the specific language governing permissions and
14db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * limitations under the License.
15db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */
16db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
176e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines#include "bcc/Assert.h"
18e198abec6c5e3eab380ccf6897b0a0b9c2dd92ddStephen Hines#include "bcc/Renderscript/RSTransforms.h"
197a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
207a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao#include <cstdlib>
2133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross#include <functional>
227a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
23b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DerivedTypes.h>
24b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Function.h>
25b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Instructions.h>
26b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/IRBuilder.h>
2718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser#include <llvm/IR/MDBuilder.h>
28b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Module.h>
29c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Pass.h>
307ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines#include <llvm/Support/raw_ostream.h>
31b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DataLayout.h>
32cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser#include <llvm/IR/Function.h>
33b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Type.h>
34806075b3a54af826fea78490fb213d8a0784138eTobias Grosser#include <llvm/Transforms/Utils/BasicBlockUtils.h>
35c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang
36c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include "bcc/Config/Config.h"
37ef73a242762bcd8113b9b65ceccbe7d909b5acbcZonr Chang#include "bcc/Support/Log.h"
38db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
39d88177580db4ddedf680854c51db333c97eabc59Stephen Hines#include "bcinfo/MetadataExtractor.h"
40d88177580db4ddedf680854c51db333c97eabc59Stephen Hines
415010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes#define NUM_EXPANDED_FUNCTION_PARAMS 4
42bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
437a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaousing namespace bcc;
447a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
45db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace {
467a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
47354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hinesstatic const bool gEnableRsTbaa = true;
489c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines
497a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao/* RSForEachExpandPass - This pass operates on functions that are able to be
507a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
517a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * ForEach-able function to be invoked over the appropriate data cells of the
527a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * input/output allocations (adjusting other relevant parameters as we go). We
537a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * support doing this for any ForEach-able compute kernels. The new function
547a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * name is the original function name followed by ".expand". Note that we
557a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * still generate code for the original function.
567a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao */
577a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaoclass RSForEachExpandPass : public llvm::ModulePass {
5833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grosspublic:
59db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  static char ID;
60db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
6133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grossprivate:
62e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  static const size_t RS_KERNEL_INPUT_LIMIT = 8; // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h
63e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
64e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  enum RsLaunchDimensionsField {
65e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldX,
66e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldY,
67e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldZ,
68e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldLod,
69e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldFace,
70e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldArray,
71e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
72e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldCount
73e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  };
74e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
75e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  enum RsExpandKernelDriverInfoPfxField {
76e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInPtr,
77e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInStride,
78e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInLen,
79e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutPtr,
80e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutStride,
81e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutLen,
82e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldDim,
83e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldCurrent,
84e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldUsr,
85e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldUsLenr,
86e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
87e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldCount
88e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  };
8933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
90bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::Module *Module;
91bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::LLVMContext *Context;
92bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
93bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  /*
94e44a3525b9703739534c3b62d7d1af4c95649a38David Gross   * Pointer to LLVM type information for the the function signature
95e44a3525b9703739534c3b62d7d1af4c95649a38David Gross   * for expanded kernels.  This must be re-calculated for each
96bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes   * module the pass is run on.
97bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes   */
98bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::FunctionType *ExpandedFunctionType;
99db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
10025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  uint32_t mExportForEachCount;
10125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  const char **mExportForEachNameList;
10225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  const uint32_t *mExportForEachSignatureList;
103cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines
1042b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // Turns on optimization of allocation stride values.
1052b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  bool mEnableStepOpt;
1062b04086acbef6520ae2c54a868b1271abf053122Stephen Hines
107bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  uint32_t getRootSignature(llvm::Function *Function) {
108db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    const llvm::NamedMDNode *ExportForEachMetadata =
109bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes        Module->getNamedMetadata("#rs_export_foreach");
110db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
111db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (!ExportForEachMetadata) {
112db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      llvm::SmallVector<llvm::Type*, 8> RootArgTys;
113bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      for (llvm::Function::arg_iterator B = Function->arg_begin(),
114bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                                        E = Function->arg_end();
115db169187dea4602e4ad32058762d23d474753fd0Stephen Hines           B != E;
116db169187dea4602e4ad32058762d23d474753fd0Stephen Hines           ++B) {
117db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        RootArgTys.push_back(B->getType());
118db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      }
119db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
120db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // For pre-ICS bitcode, we may not have signature information. In that
121db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // case, we use the size of the RootArgTys to select the number of
122db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // arguments.
123db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      return (1 << RootArgTys.size()) - 1;
124db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
125db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1267ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    if (ExportForEachMetadata->getNumOperands() == 0) {
1277ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      return 0;
1287ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
1297ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
1306e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines    bccAssert(ExportForEachMetadata->getNumOperands() > 0);
131db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
132cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // We only handle the case for legacy root() functions here, so this is
133cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // hard-coded to look at only the first such function.
134db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
135900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    if (SigNode != nullptr && SigNode->getNumOperands() == 1) {
1361bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines      llvm::Metadata *SigMD = SigNode->getOperand(0);
1371bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines      if (llvm::MDString *SigS = llvm::dyn_cast<llvm::MDString>(SigMD)) {
1381bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines        llvm::StringRef SigString = SigS->getString();
139db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        uint32_t Signature = 0;
140db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        if (SigString.getAsInteger(10, Signature)) {
141db169187dea4602e4ad32058762d23d474753fd0Stephen Hines          ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
142db169187dea4602e4ad32058762d23d474753fd0Stephen Hines          return 0;
143db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        }
144db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        return Signature;
145db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      }
146db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
147db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
148db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return 0;
149db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
150db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
151429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray  bool isStepOptSupported(llvm::Type *AllocType) {
152429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
153429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
154429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
155429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
156429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (mEnableStepOpt) {
157429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
158429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
159429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
160429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType == VoidPtrTy) {
161429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
162429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
163429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
164429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (!PT) {
165429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
166429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
167429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
168429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // remaining conditions are 64-bit only
169429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (VoidPtrTy->getPrimitiveSizeInBits() == 32) {
170429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return true;
171429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
172429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
173429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // coerce suggests an upconverted struct type, which we can't support
174429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) {
175429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
176429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
177429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
178429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported
179429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2);
180429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128);
181429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType == V2xi64Ty || AllocType == Int128Ty) {
182429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
183429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
184429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
185429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    return true;
186429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray  }
187429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
1882b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // Get the actual value we should use to step through an allocation.
1897b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  //
1907b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // Normally the value we use to step through an allocation is given to us by
1917b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // the driver. However, for certain primitive data types, we can derive an
1927b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // integer constant for the step value. We use this integer constant whenever
1937b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // possible to allow further compiler optimizations to take place.
1947b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  //
195b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines  // DL - Target Data size/layout information.
1962b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // T - Type of allocation (should be a pointer).
1972b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // OrigStep - Original step increment (root.expand() input from driver).
198bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
1992b04086acbef6520ae2c54a868b1271abf053122Stephen Hines                            llvm::Value *OrigStep) {
200b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines    bccAssert(DL);
201bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bccAssert(AllocType);
2022b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    bccAssert(OrigStep);
203bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
204429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (isStepOptSupported(AllocType)) {
2052b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      llvm::Type *ET = PT->getElementType();
206b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines      uint64_t ETSize = DL->getTypeAllocSize(ET);
207bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
2082b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      return llvm::ConstantInt::get(Int32Ty, ETSize);
2092b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    } else {
2102b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      return OrigStep;
2112b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    }
2122b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  }
2132b04086acbef6520ae2c54a868b1271abf053122Stephen Hines
214097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes  /// Builds the types required by the pass for the given context.
215bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  void buildTypes(void) {
216e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    // Create the RsLaunchDimensionsTy and RsExpandKernelDriverInfoPfxTy structs.
217bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
218e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8Ty                   = llvm::Type::getInt8Ty(*Context);
219e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8PtrTy                = Int8Ty->getPointerTo();
220e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8PtrArrayInputLimitTy = llvm::ArrayType::get(Int8PtrTy, RS_KERNEL_INPUT_LIMIT);
221e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32Ty                  = llvm::Type::getInt32Ty(*Context);
222e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32ArrayInputLimitTy   = llvm::ArrayType::get(Int32Ty, RS_KERNEL_INPUT_LIMIT);
223e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *VoidPtrTy                = llvm::Type::getInt8PtrTy(*Context);
224e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32Array4Ty            = llvm::ArrayType::get(Int32Ty, 4);
225097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
226097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes    /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h:
227db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *
228e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * struct RsLaunchDimensions {
229e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t x;
230db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   uint32_t y;
231db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   uint32_t z;
232e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t lod;
233e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t face;
234e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t array[4];
235e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * };
236e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     */
237e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::SmallVector<llvm::Type*, RsLaunchDimensionsFieldCount> RsLaunchDimensionsTypes;
238e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t x
239e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t y
240e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t z
241e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t lod
242e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t face
243e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Array4Ty); // uint32_t array[4]
244e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::StructType *RsLaunchDimensionsTy =
245e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        llvm::StructType::create(RsLaunchDimensionsTypes, "RsLaunchDimensions");
246e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
2471d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross    /* Defined as the beginning of RsExpandKernelDriverInfo in frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h:
248e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
249e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * struct RsExpandKernelDriverInfoPfx {
250e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT];
251e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t inStride[RS_KERNEL_INPUT_LIMIT];
252e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t inLen;
253e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
254e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT];
255e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t outStride[RS_KERNEL_INPUT_LIMIT];
256e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t outLen;
257e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
258e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // Dimension of the launch
259e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     RsLaunchDimensions dim;
260e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
261e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // The walking iterator of the launch
262e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     RsLaunchDimensions current;
263e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
264e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     const void *usr;
265e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t usrLen;
266e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
267e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // Items below this line are not used by the compiler and can be change in the driver.
268e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // So the compiler must assume there are an unknown number of fields of unknown type
269e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // beginning here.
270db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     * };
2711d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross     *
2721d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross     * The name "RsExpandKernelDriverInfoPfx" is known to RSInvariantPass (RSInvariant.cpp).
273db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     */
274e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::SmallVector<llvm::Type*, RsExpandKernelDriverInfoPfxFieldCount> RsExpandKernelDriverInfoPfxTypes;
275e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT]
276e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy);   // uint32_t inStride[RS_KERNEL_INPUT_LIMIT]
277e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t inLen
278e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT]
279e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy);   // uint32_t outStride[RS_KERNEL_INPUT_LIMIT]
280e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t outLen
281e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy);     // RsLaunchDimensions dim
282e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy);     // RsLaunchDimensions current
283e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy);                // const void *usr
284e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t usrLen
285e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::StructType *RsExpandKernelDriverInfoPfxTy =
286e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx");
287bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
288bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    // Create the function type for expanded kernels.
289bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
290e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *RsExpandKernelDriverInfoPfxPtrTy = RsExpandKernelDriverInfoPfxTy->getPointerTo();
291bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
292bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::SmallVector<llvm::Type*, 8> ParamTypes;
293e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    ParamTypes.push_back(RsExpandKernelDriverInfoPfxPtrTy); // const RsExpandKernelDriverInfoPfx *p
294e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    ParamTypes.push_back(Int32Ty);                          // uint32_t x1
295e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    ParamTypes.push_back(Int32Ty);                          // uint32_t x2
296e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    ParamTypes.push_back(Int32Ty);                          // uint32_t outstep
297bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
298e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    ExpandedFunctionType =
299e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes        llvm::FunctionType::get(llvm::Type::getVoidTy(*Context), ParamTypes,
300e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                false);
3018ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  }
3028ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
303357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  /// @brief Create skeleton of the expanded function.
304357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
305357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  /// This creates a function with the following signature:
306357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
307357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
3085010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes  ///         uint32_t outstep)
309357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
310357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) {
311bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
312bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Function::Create(ExpandedFunctionType,
313bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                             llvm::GlobalValue::ExternalLinkage,
314bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                             OldName + ".expand", Module);
315bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
316bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
317bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
318bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
319bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
320bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("p");
321bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("x1");
322bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("x2");
323bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("arg_outstep");
324bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
325bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
326bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                                                       ExpandedFunction);
327806075b3a54af826fea78490fb213d8a0784138eTobias Grosser    llvm::IRBuilder<> Builder(Begin);
328806075b3a54af826fea78490fb213d8a0784138eTobias Grosser    Builder.CreateRetVoid();
329806075b3a54af826fea78490fb213d8a0784138eTobias Grosser
330bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    return ExpandedFunction;
331357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  }
332357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser
333e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @brief Create an empty loop
334e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
335e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// Create a loop of the form:
336e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
337e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// for (i = LowerBound; i < UpperBound; i++)
338e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///   ;
339e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
340e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// After the loop has been created, the builder is set such that
341e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// instructions can be added to the loop body.
342e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
343e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param Builder The builder to use to build this loop. The current
344e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///                position of the builder is the position the loop
345e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///                will be inserted.
346e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param LowerBound The first value of the loop iterator
347e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param UpperBound The maximal value of the loop iterator
348e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param LoopIV A reference that will be set to the loop iterator.
349e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @return The BasicBlock that will be executed after the loop.
350e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
351e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser                               llvm::Value *LowerBound,
352e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser                               llvm::Value *UpperBound,
353e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser                               llvm::PHINode **LoopIV) {
354c2ca742d7d0197c52e49467862844463fb42280fDavid Gross    bccAssert(LowerBound->getType() == UpperBound->getType());
355e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
356e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
357e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    llvm::Value *Cond, *IVNext;
358e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    llvm::PHINode *IV;
359e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
360e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    CondBB = Builder.GetInsertBlock();
3611bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines    AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), nullptr, nullptr);
362bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
363e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
364e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // if (LowerBound < Upperbound)
365e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto LoopHeader
366e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // else
367e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto AfterBB
368e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    CondBB->getTerminator()->eraseFromParent();
369e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.SetInsertPoint(CondBB);
370e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser    Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
371e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
372e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
373e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
374e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // iv.next = iv + 1
375e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // if (iv.next < Upperbound)
376e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto LoopHeader
377e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // else
378e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto AfterBB
379e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.SetInsertPoint(HeaderBB);
380e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
381e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    IV->addIncoming(LowerBound, CondBB);
382e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
383e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    IV->addIncoming(IVNext, HeaderBB);
384e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser    Cond = Builder.CreateICmpULT(IVNext, UpperBound);
385e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
386e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    AfterBB->setName("Exit");
387e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
388e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    *LoopIV = IV;
389e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    return AfterBB;
390e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  }
391e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
39228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // Finish building the outgoing argument list for calling a ForEach-able function.
39328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //
39428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // ArgVector - on input, the non-special arguments
39528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //             on output, the non-special arguments combined with the special arguments
39628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //               from SpecialArgVector
39728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // SpecialArgVector - special arguments (from ExpandSpecialArguments())
39828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // SpecialArgContextIdx - return value of ExpandSpecialArguments()
39928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //                          (position of context argument in SpecialArgVector)
40028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // CalleeFunction - the ForEach-able function being called
40128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // Builder - for inserting code into the caller function
40228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  template<unsigned int ArgVectorLen, unsigned int SpecialArgVectorLen>
40328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  void finishArgList(      llvm::SmallVector<llvm::Value *, ArgVectorLen>        &ArgVector,
40428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     const llvm::SmallVector<llvm::Value *, SpecialArgVectorLen> &SpecialArgVector,
40528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     const int SpecialArgContextIdx,
40628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     const llvm::Function &CalleeFunction,
40728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     llvm::IRBuilder<> &CallerBuilder) {
40828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    /* The context argument (if any) is a pointer to an opaque user-visible type that differs from
40928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     * the RsExpandKernelDriverInfoPfx type used in the function we are generating (although the
41028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     * two types represent the same thing).  Therefore, we must introduce a pointer cast when
41128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     * generating a call to the kernel function.
41228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     */
41328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    const int ArgContextIdx =
41428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross        SpecialArgContextIdx >= 0 ? (ArgVector.size() + SpecialArgContextIdx) : SpecialArgContextIdx;
41528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    ArgVector.append(SpecialArgVector.begin(), SpecialArgVector.end());
41628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    if (ArgContextIdx >= 0) {
41728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      llvm::Type *ContextArgType = nullptr;
41828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      int ArgIdx = ArgContextIdx;
41928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      for (const auto &Arg : CalleeFunction.getArgumentList()) {
42028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross        if (!ArgIdx--) {
42128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross          ContextArgType = Arg.getType();
42228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross          break;
42328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross        }
42428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      }
42528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      bccAssert(ContextArgType);
42628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      ArgVector[ArgContextIdx] = CallerBuilder.CreatePointerCast(ArgVector[ArgContextIdx], ContextArgType);
42728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    }
42828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  }
42928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
430083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // GEPHelper() returns a SmallVector of values suitable for passing
431083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // to IRBuilder::CreateGEP(), and SmallGEPIndices is a typedef for
432083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // the returned data type. It is sized so that the SmallVector
433083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // returned by GEPHelper() never needs to do a heap allocation for
434083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // any list of GEP indices it encounters in the code.
435083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  typedef llvm::SmallVector<llvm::Value *, 3> SmallGEPIndices;
436083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
437083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // Helper for turning a list of constant integer GEP indices into a
438083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // SmallVector of llvm::Value*. The return value is suitable for
439083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // passing to a GetElementPtrInst constructor or IRBuilder::CreateGEP().
440083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //
441083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // Inputs:
442083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //   I32Args should be integers which represent the index arguments
443083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //   to a GEP instruction.
444083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //
445083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // Returns:
446083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //   Returns a SmallVector of ConstantInts.
447083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  SmallGEPIndices GEPHelper(std::initializer_list<int32_t> I32Args) {
448083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    SmallGEPIndices Out(I32Args.size());
449083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::IntegerType *I32Ty = llvm::Type::getInt32Ty(*Context);
450083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    std::transform(I32Args.begin(), I32Args.end(), Out.begin(),
451083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                   [I32Ty](int32_t Arg) { return llvm::ConstantInt::get(I32Ty, Arg); });
452083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    return Out;
453083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  }
454083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
4558ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosserpublic:
45633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  RSForEachExpandPass(bool pEnableStepOpt = true)
457900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes      : ModulePass(ID), Module(nullptr), Context(nullptr),
458bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes        mEnableStepOpt(pEnableStepOpt) {
459bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
4608ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  }
4618ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
462c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines  virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
463c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines    // This pass does not use any other analysis passes, but it does
464c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines    // add/wrap the existing functions in the module (thus altering the CFG).
465c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines  }
466c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines
46733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // Build contribution to outgoing argument list for calling a
46833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // ForEach-able function, based on the special parameters of that
46933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // function.
47033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  //
47133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // Signature - metadata bits for the signature of the ForEach-able function
47233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // X, Arg_p - values derived directly from expanded function,
47333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  //            suitable for computing arguments for the ForEach-able function
47433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // CalleeArgs - contribution is accumulated here
47533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // Bump - invoked once for each contributed outgoing argument
476083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // LoopHeaderInsertionPoint - an Instruction in the loop header, before which
477083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //                            this function can insert loop-invariant loads
47828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //
47928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // Return value is the (zero-based) position of the context (Arg_p)
48028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // argument in the CalleeArgs vector, or a negative value if the
48128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // context argument is not placed in the CalleeArgs vector.
48228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  int ExpandSpecialArguments(uint32_t Signature,
48328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::Value *X,
48428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::Value *Arg_p,
48528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::IRBuilder<> &Builder,
48628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::SmallVector<llvm::Value*, 8> &CalleeArgs,
487083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             std::function<void ()> Bump,
488083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             llvm::Instruction *LoopHeaderInsertionPoint) {
48928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
49028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    bccAssert(CalleeArgs.empty());
49128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
49228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    int Return = -1;
49333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) {
49433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      CalleeArgs.push_back(Arg_p);
49533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      Bump();
49628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      Return = CalleeArgs.size() - 1;
49733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
49833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
49933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
50033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      CalleeArgs.push_back(X);
50133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      Bump();
50233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
50333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
504e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature) ||
505e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
506083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      bccAssert(LoopHeaderInsertionPoint);
50733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
508083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      // Y and Z are loop invariant, so they can be hoisted out of the
509083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      // loop. Set the IRBuilder insertion point to the loop header.
510083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      auto OldInsertionPoint = Builder.saveIP();
511083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      Builder.SetInsertPoint(LoopHeaderInsertionPoint);
512e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
513e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
514083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        SmallGEPIndices YValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent,
515083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          RsLaunchDimensionsFieldY}));
516083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value *YAddr = Builder.CreateInBoundsGEP(Arg_p, YValueGEP, "Y.gep");
517083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        CalleeArgs.push_back(Builder.CreateLoad(YAddr, "Y"));
518e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        Bump();
519e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      }
520e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
521e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
522083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        SmallGEPIndices ZValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent,
523083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          RsLaunchDimensionsFieldZ}));
524083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value *ZAddr = Builder.CreateInBoundsGEP(Arg_p, ZValueGEP, "Z.gep");
525083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        CalleeArgs.push_back(Builder.CreateLoad(ZAddr, "Z"));
526e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        Bump();
527e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      }
528083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
529083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      Builder.restoreIP(OldInsertionPoint);
53033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
53128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
53228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    return Return;
53333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  }
53433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
5358ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  /* Performs the actual optimization on a selected function. On success, the
5368ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   * Module will contain a new function of the name "<NAME>.expand" that
5378ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   * invokes <NAME>() in a loop with the appropriate parameters.
5388ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   */
539bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  bool ExpandFunction(llvm::Function *Function, uint32_t Signature) {
540bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    ALOGV("Expanding ForEach-able Function %s",
541bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes          Function->getName().str().c_str());
5428ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
5438ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser    if (!Signature) {
544bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      Signature = getRootSignature(Function);
5458ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser      if (!Signature) {
5468ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        // We couldn't determine how to expand this function based on its
5478ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        // function signature.
5488ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        return false;
5498ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser      }
5508ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser    }
5518ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
552bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::DataLayout DL(Module);
5538ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
554bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
555bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      createEmptyExpandedFunction(Function->getName());
556db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
557bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    /*
558bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * Extract the expanded function's parameters.  It is guaranteed by
559bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * createEmptyExpandedFunction that there will be five parameters.
560bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     */
56133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
56233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
56333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
564bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator ExpandedFunctionArgIter =
565bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      ExpandedFunction->arg_begin();
566db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
567bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
568bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
569bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
5705010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes    llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
571bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
572900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *InStep  = nullptr;
573900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutStep = nullptr;
574db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
575db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // Construct the actual function body.
576bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
577db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
578cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // Collect and construct the arguments for the kernel().
579db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // Note that we load any loop-invariant arguments before entering the Loop.
580bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
581db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
582900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Type  *InTy      = nullptr;
583083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::Value *InBufPtr = nullptr;
584d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
585083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices InStepGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInStride, 0}));
586083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      llvm::LoadInst *InStepArg  = Builder.CreateLoad(
587083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        Builder.CreateInBoundsGEP(Arg_p, InStepGEP, "instep_addr.gep"), "instep_addr");
588e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
589bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      InTy = (FunctionArgIter++)->getType();
590e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes      InStep = getStepValue(&DL, InTy, InStepArg);
591e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
5922b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      InStep->setName("instep");
593e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
594083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices InputAddrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr, 0}));
595083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      InBufPtr = Builder.CreateLoad(
596083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        Builder.CreateInBoundsGEP(Arg_p, InputAddrGEP, "input_buf.gep"), "input_buf");
597db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
598db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
599900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Type *OutTy = nullptr;
600900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutBasePtr = nullptr;
601d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
602bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      OutTy = (FunctionArgIter++)->getType();
603b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
6042b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      OutStep->setName("outstep");
605083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0}));
606083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep"));
607db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
608db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
609900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *UsrData = nullptr;
610d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
611bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
612083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      llvm::Value *UsrDataPointerAddr = Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldUsr);
613083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      UsrData = Builder.CreatePointerCast(Builder.CreateLoad(UsrDataPointerAddr), UsrDataTy);
614db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      UsrData->setName("UsrData");
615db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
616db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
617083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
61833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    llvm::PHINode *IV;
61933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    createLoop(Builder, Arg_x1, Arg_x2, &IV);
620097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
62133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
62228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    const int CalleeArgsContextIdx = ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
623083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                                                            [&FunctionArgIter]() { FunctionArgIter++; },
624083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                                                            LoopHeader->getTerminator());
625db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
626bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bccAssert(FunctionArgIter == Function->arg_end());
627db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
628cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // Populate the actual call to kernel().
629db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::SmallVector<llvm::Value*, 8> RootArgs;
630db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
631900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *InPtr  = nullptr;
632900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutPtr = nullptr;
633db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
634ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    // Calculate the current input and output pointers
63502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    //
636ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    // We always calculate the input/output pointers with a GEP operating on i8
63702f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // values and only cast at the very end to OutTy. This is because the step
63802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // between two values is given in bytes.
63902f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    //
64002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // TODO: We could further optimize the output by using a GEP operation of
64102f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // type 'OutTy' in cases where the element type of the allocation allows.
64202f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    if (OutBasePtr) {
64302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
64402f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      OutOffset = Builder.CreateMul(OutOffset, OutStep);
645083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutPtr = Builder.CreateInBoundsGEP(OutBasePtr, OutOffset);
64602f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
64702f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    }
648bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
649083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    if (InBufPtr) {
650ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
651ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      InOffset = Builder.CreateMul(InOffset, InStep);
652083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      InPtr = Builder.CreateInBoundsGEP(InBufPtr, InOffset);
653ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      InPtr = Builder.CreatePointerCast(InPtr, InTy);
654ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    }
65502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser
656ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    if (InPtr) {
6577ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      RootArgs.push_back(InPtr);
658db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
659db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
66002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    if (OutPtr) {
6617ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      RootArgs.push_back(OutPtr);
662db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
663db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
664db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (UsrData) {
665db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      RootArgs.push_back(UsrData);
666db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
667db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
66828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
669db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
670bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    Builder.CreateCall(Function, RootArgs);
671db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
6727ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    return true;
6737ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines  }
6747ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
6757ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines  /* Expand a pass-by-value kernel.
6767ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines   */
677bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  bool ExpandKernel(llvm::Function *Function, uint32_t Signature) {
678d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
679bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
6807ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
6817ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    // TODO: Refactor this to share functionality with ExpandFunction.
682bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::DataLayout DL(Module);
6837ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
684bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
685bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      createEmptyExpandedFunction(Function->getName());
6867ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
687bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    /*
688bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * Extract the expanded function's parameters.  It is guaranteed by
689bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * createEmptyExpandedFunction that there will be five parameters.
690bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     */
691881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
692881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
693881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
694bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator ExpandedFunctionArgIter =
695bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      ExpandedFunction->arg_begin();
696bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
697bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
698bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
699bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
7005010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes    llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
7017ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
7027ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    // Construct the actual function body.
703bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
7047ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
70518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Create TBAA meta-data.
706354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript,
707354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines                 *TBAAAllocation, *TBAAPointer;
708bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDBuilder MDHelper(*Context);
70914588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien
710354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    TBAARenderScriptDistinct =
711354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines      MDHelper.createTBAARoot("RenderScript Distinct TBAA");
712354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    TBAARenderScript = MDHelper.createTBAANode("RenderScript TBAA",
713354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines        TBAARenderScriptDistinct);
714e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
715e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                       TBAARenderScript);
716e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
717e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                      TBAAAllocation, 0);
718e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
719e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                    TBAARenderScript);
72014588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien    TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
72118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
722881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    /*
723881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     * Collect and construct the arguments for the kernel().
724881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     *
725881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     * Note that we load any loop-invariant arguments before entering the Loop.
726881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     */
727083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    size_t NumRemainingInputs = Function->arg_size();
7287ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
729881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // No usrData parameter on kernels.
730881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    bccAssert(
731881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
732881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
733881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    llvm::Function::arg_iterator ArgIter = Function->arg_begin();
734881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
735881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Check the return type
736bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::Type     *OutTy            = nullptr;
737bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::Value    *OutStep          = nullptr;
738bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::LoadInst *OutBasePtr       = nullptr;
739bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::Value    *CastedOutBasePtr = nullptr;
740881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
741e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    bool PassOutByPointer = false;
742881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
743d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
744bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *OutBaseTy = Function->getReturnType();
745881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
74674a4b08235990916911b8fe758d656c1171faf26Stephen Hines      if (OutBaseTy->isVoidTy()) {
747e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes        PassOutByPointer = true;
748881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        OutTy = ArgIter->getType();
749881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
750881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        ArgIter++;
751083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        --NumRemainingInputs;
75274a4b08235990916911b8fe758d656c1171faf26Stephen Hines      } else {
75374a4b08235990916911b8fe758d656c1171faf26Stephen Hines        // We don't increment Args, since we are using the actual return type.
754881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        OutTy = OutBaseTy->getPointerTo();
75574a4b08235990916911b8fe758d656c1171faf26Stephen Hines      }
756881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
757b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
75874a4b08235990916911b8fe758d656c1171faf26Stephen Hines      OutStep->setName("outstep");
759083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0}));
760083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep"));
761097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
7629c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      if (gEnableRsTbaa) {
7639c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines        OutBasePtr->setMetadata("tbaa", TBAAPointer);
7649c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      }
76550f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray
766bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray      CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out");
76774a4b08235990916911b8fe758d656c1171faf26Stephen Hines    }
76874a4b08235990916911b8fe758d656c1171faf26Stephen Hines
769bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::SmallVector<llvm::Type*,  8> InTypes;
770bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::SmallVector<llvm::Value*, 8> InSteps;
771083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
772d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala    llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
773881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
774083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    bccAssert(NumRemainingInputs <= RS_KERNEL_INPUT_LIMIT);
775881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
776083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // Create the loop structure.
777083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
778083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::PHINode *IV;
779083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    createLoop(Builder, Arg_x1, Arg_x2, &IV);
780881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
781083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
782083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    const int CalleeArgsContextIdx =
783083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
784083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             [&NumRemainingInputs]() { --NumRemainingInputs; },
785083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             LoopHeader->getTerminator());
786083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
787083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // After ExpandSpecialArguments() gets called, NumRemainingInputs
788083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // counts the number of arguments to the kernel that correspond to
789083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // an array entry from the InPtr field of the DriverInfo
790083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // structure.
791083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    const size_t NumInPtrArguments = NumRemainingInputs;
792083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
793083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    if (NumInPtrArguments > 0) {
794083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      // Extract information about input slots and step sizes. The work done
795083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      // here is loop-invariant, so we can hoist the operations out of the loop.
796083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      auto OldInsertionPoint = Builder.saveIP();
797083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      Builder.SetInsertPoint(LoopHeader->getTerminator());
798083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
799083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      for (size_t InputIndex = 0; InputIndex < NumInPtrArguments; ++InputIndex, ArgIter++) {
800083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        SmallGEPIndices InStepGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInStride,
801083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          static_cast<int32_t>(InputIndex)}));
802083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value *InStepAddr = Builder.CreateInBoundsGEP(Arg_p, InStepGEP, "instep_addr.gep");
803083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::LoadInst *InStepArg = Builder.CreateLoad(InStepAddr, "instep_addr");
804881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
805d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        llvm::Type *InType = ArgIter->getType();
806326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes
807326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes        /*
808d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala         * AArch64 calling conventions dictate that structs of sufficient size
809d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala         * get passed by pointer instead of passed by value.  This, combined
810d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala         * with the fact that we don't allow kernels to operate on pointer
811d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala         * data means that if we see a kernel with a pointer parameter we know
812083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala         * that it is a struct input that has been promoted.  As such we don't
813d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala         * need to convert its type to a pointer.  Later we will need to know
814d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala         * to create a temporary copy on the stack, so we save this information
815d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala         * in InStructTempSlots.
816326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes         */
817d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        if (auto PtrType = llvm::dyn_cast<llvm::PointerType>(InType)) {
818d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala          llvm::Type *ElementType = PtrType->getElementType();
819083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          InStructTempSlots.push_back(Builder.CreateAlloca(ElementType, nullptr,
820083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                                                           "input_struct_slot"));
821d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        } else {
822d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala          InType = InType->getPointerTo();
823d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala          InStructTempSlots.push_back(nullptr);
824d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        }
825326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes
826d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        llvm::Value *InStep = getStepValue(&DL, InType, InStepArg);
827881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
828d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        InStep->setName("instep");
829881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
830083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        SmallGEPIndices InBufPtrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr,
831083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          static_cast<int32_t>(InputIndex)}));
832083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value    *InBufPtrAddr = Builder.CreateInBoundsGEP(Arg_p, InBufPtrGEP, "input_buf.gep");
833083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::LoadInst *InBufPtr = Builder.CreateLoad(InBufPtrAddr, "input_buf");
834083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value    *CastInBufPtr = Builder.CreatePointerCast(InBufPtr, InType, "casted_in");
835d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        if (gEnableRsTbaa) {
836083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          InBufPtr->setMetadata("tbaa", TBAAPointer);
837d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        }
838881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
839d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        InTypes.push_back(InType);
840d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        InSteps.push_back(InStep);
841083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        InBufPtrs.push_back(CastInBufPtr);
842881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes      }
843083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
844083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      Builder.restoreIP(OldInsertionPoint);
845881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    }
8467ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
8477ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    // Populate the actual call to kernel().
8487ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    llvm::SmallVector<llvm::Value*, 8> RootArgs;
8497ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
8509296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala    // Calculate the current input and output pointers.
851881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
852881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Output
853881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
854900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutPtr = nullptr;
855bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    if (CastedOutBasePtr) {
8567b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
857881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
858083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffset);
859bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
860e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes      if (PassOutByPointer) {
861881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        RootArgs.push_back(OutPtr);
862881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes      }
8634102bec56151fb5d9c962fb298412f34a6eacaa8Tobias Grosser    }
8647b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser
865881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Inputs
86674a4b08235990916911b8fe758d656c1171faf26Stephen Hines
867083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    if (NumInPtrArguments > 0) {
868881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes      llvm::Value *Offset = Builder.CreateSub(IV, Arg_x1);
869881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
870083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      for (size_t Index = 0; Index < NumInPtrArguments; ++Index) {
871083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value *InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], Offset);
872326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes        llvm::Value *Input;
873326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes
8749296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala        llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
8759296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala
8769296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala        if (gEnableRsTbaa) {
8779296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala          InputLoad->setMetadata("tbaa", TBAAAllocation);
8789296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala        }
8799296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala
880d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        if (llvm::Value *TemporarySlot = InStructTempSlots[Index]) {
881d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala          // Pass a pointer to a temporary on the stack, rather than
882d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala          // passing a pointer to the original value. We do not want
883d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala          // the kernel to potentially modify the input data.
884d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala
8859296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala          // Note: don't annotate with TBAA, since the kernel might
8869296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala          // have its own TBAA annotations for the pointer argument.
8879296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala          Builder.CreateStore(InputLoad, TemporarySlot);
888d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala          Input = TemporarySlot;
889326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes        } else {
890326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes          Input = InputLoad;
891881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        }
892881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
893881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        RootArgs.push_back(Input);
8949c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      }
8957ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
8967ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
89728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
8987ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
899bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
9007ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
901e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    if (OutPtr && !PassOutByPointer) {
9029296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala      RetVal->setName("call.result");
90318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
9049c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      if (gEnableRsTbaa) {
9059c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines        Store->setMetadata("tbaa", TBAAAllocation);
9069c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      }
9077ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
9087ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
909db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return true;
910db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
911db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
91218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// @brief Checks if pointers to allocation internals are exposed
91318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  ///
91418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// This function verifies if through the parameters passed to the kernel
91518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// or through calls to the runtime library the script gains access to
91618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// pointers pointing to data within a RenderScript Allocation.
91718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// If we know we control all loads from and stores to data within
91818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// RenderScript allocations and if we know the run-time internal accesses
91918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// are all annotated with RenderScript TBAA metadata, only then we
92018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// can safely use TBAA to distinguish between generic and from-allocation
92118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// pointers.
922bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  bool allocPointersExposed(llvm::Module &Module) {
92318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Old style kernel function can expose pointers to elements within
92418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // allocations.
92518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // TODO: Extend analysis to allow simple cases of old-style kernels.
92625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    for (size_t i = 0; i < mExportForEachCount; ++i) {
92725eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      const char *Name = mExportForEachNameList[i];
92825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      uint32_t Signature = mExportForEachSignatureList[i];
929bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (Module.getFunction(Name) &&
930d88177580db4ddedf680854c51db333c97eabc59Stephen Hines          !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
93118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
93218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
93318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
93418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
93518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Check for library functions that expose a pointer to an Allocation or
93618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // that are not yet annotated with RenderScript-specific tbaa information.
937e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala    static const std::vector<const char *> Funcs{
938e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAt(...)
939e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsGetElementAt13rs_allocationj",
940e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsGetElementAt13rs_allocationjj",
941e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsGetElementAt13rs_allocationjjj",
942e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
943e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsSetElementAt()
944e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsSetElementAt13rs_allocationPvj",
945e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsSetElementAt13rs_allocationPvjj",
946e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsSetElementAt13rs_allocationPvjjj",
947e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
948e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAtYuv_uchar_Y()
949e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj",
950e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
951e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAtYuv_uchar_U()
952e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj",
953e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
954e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAtYuv_uchar_V()
955e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj",
956e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala    };
957e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
958e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala    for (auto FI : Funcs) {
959e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      llvm::Function *Function = Module.getFunction(FI);
96018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
961bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (!Function) {
962e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala        ALOGE("Missing run-time function '%s'", FI);
96318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
96418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
96518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
966bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (Function->getNumUses() > 0) {
96718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
96818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
96918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
97018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
97118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    return false;
97218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  }
97318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
97418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
97518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  ///
97618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// The TBAA metadata used to annotate loads/stores from RenderScript
977e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// Allocations is generated in a separate TBAA tree with a
978354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// "RenderScript Distinct TBAA" root node. LLVM does assume may-alias for
979354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// all nodes in unrelated alias analysis trees. This function makes the
980354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// "RenderScript TBAA" node (which is parented by the Distinct TBAA root),
981e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With
982e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// the connected trees every access to an Allocation is resolved to
983e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// must-alias if compared to a normal C/C++ access.
984bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
985bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDBuilder MDHelper(*Context);
986354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScriptDistinct =
987354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines      MDHelper.createTBAARoot("RenderScript Distinct TBAA");
988354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScript = MDHelper.createTBAANode(
989354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines        "RenderScript TBAA", TBAARenderScriptDistinct);
990bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDNode *TBAARoot     = MDHelper.createTBAARoot("Simple C/C++ TBAA");
991354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    TBAARenderScript->replaceOperandWith(1, TBAARoot);
99218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  }
99318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
994bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  virtual bool runOnModule(llvm::Module &Module) {
995bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bool Changed  = false;
996bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    this->Module  = &Module;
997bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    this->Context = &Module.getContext();
998bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
999bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    this->buildTypes();
1000bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
1001bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bcinfo::MetadataExtractor me(&Module);
100225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    if (!me.extract()) {
100325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      ALOGE("Could not extract metadata from module!");
100425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      return false;
100525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    }
100625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachCount = me.getExportForEachSignatureCount();
100725eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachNameList = me.getExportForEachNameList();
100825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachSignatureList = me.getExportForEachSignatureList();
1009db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1010bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bool AllocsExposed = allocPointersExposed(Module);
101118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
101225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    for (size_t i = 0; i < mExportForEachCount; ++i) {
101325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      const char *name = mExportForEachNameList[i];
101425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      uint32_t signature = mExportForEachSignatureList[i];
1015bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Function *kernel = Module.getFunction(name);
1016cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser      if (kernel) {
1017d88177580db4ddedf680854c51db333c97eabc59Stephen Hines        if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
1018cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser          Changed |= ExpandKernel(kernel, signature);
1019acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
1020acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        } else if (kernel->getReturnType()->isVoidTy()) {
1021cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser          Changed |= ExpandFunction(kernel, signature);
1022acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
1023acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        } else {
1024acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // There are some graphics root functions that are not
1025acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // expanded, but that will be called directly. For those
1026acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // functions, we can not set the linkage to internal.
1027acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        }
1028cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines      }
1029db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
1030db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
10319c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines    if (gEnableRsTbaa && !AllocsExposed) {
1032bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      connectRenderScriptTBAAMetadata(Module);
103318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
103418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1035cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    return Changed;
1036db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
1037db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1038db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  virtual const char *getPassName() const {
1039db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return "ForEach-able Function Expansion";
1040db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
1041db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
10427a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao}; // end RSForEachExpandPass
1043db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
10447a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end anonymous namespace
10457a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
10467a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaochar RSForEachExpandPass::ID = 0;
104733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grossstatic llvm::RegisterPass<RSForEachExpandPass> X("foreachexp", "ForEach Expand Pass");
1048db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1049db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace bcc {
1050db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
10517a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaollvm::ModulePass *
105225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen HinescreateRSForEachExpandPass(bool pEnableStepOpt){
105325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  return new RSForEachExpandPass(pEnableStepOpt);
10547a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao}
1055db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
10567a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end namespace bcc
1057