RSKernelExpand.cpp revision c2ca742d7d0197c52e49467862844463fb42280f
1db169187dea4602e4ad32058762d23d474753fd0Stephen Hines/*
2db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Copyright 2012, The Android Open Source Project
3db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
4db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Licensed under the Apache License, Version 2.0 (the "License");
5db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * you may not use this file except in compliance with the License.
6db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * You may obtain a copy of the License at
7db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
8db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *     http://www.apache.org/licenses/LICENSE-2.0
9db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
10db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Unless required by applicable law or agreed to in writing, software
11db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * distributed under the License is distributed on an "AS IS" BASIS,
12db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * See the License for the specific language governing permissions and
14db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * limitations under the License.
15db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */
16db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
176e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines#include "bcc/Assert.h"
18e198abec6c5e3eab380ccf6897b0a0b9c2dd92ddStephen Hines#include "bcc/Renderscript/RSTransforms.h"
197a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
207a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao#include <cstdlib>
2133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross#include <functional>
227a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
23b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DerivedTypes.h>
24b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Function.h>
25b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Instructions.h>
26b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/IRBuilder.h>
2718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser#include <llvm/IR/MDBuilder.h>
28b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Module.h>
29c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Pass.h>
307ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines#include <llvm/Support/raw_ostream.h>
31b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DataLayout.h>
32cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser#include <llvm/IR/Function.h>
33b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Type.h>
34806075b3a54af826fea78490fb213d8a0784138eTobias Grosser#include <llvm/Transforms/Utils/BasicBlockUtils.h>
35c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang
36c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include "bcc/Config/Config.h"
37ef73a242762bcd8113b9b65ceccbe7d909b5acbcZonr Chang#include "bcc/Support/Log.h"
38db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
39d88177580db4ddedf680854c51db333c97eabc59Stephen Hines#include "bcinfo/MetadataExtractor.h"
40d88177580db4ddedf680854c51db333c97eabc59Stephen Hines
415010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes#define NUM_EXPANDED_FUNCTION_PARAMS 4
42bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
437a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaousing namespace bcc;
447a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
45db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace {
467a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
47354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hinesstatic const bool gEnableRsTbaa = true;
489c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines
497a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao/* RSForEachExpandPass - This pass operates on functions that are able to be
507a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
517a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * ForEach-able function to be invoked over the appropriate data cells of the
527a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * input/output allocations (adjusting other relevant parameters as we go). We
537a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * support doing this for any ForEach-able compute kernels. The new function
547a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * name is the original function name followed by ".expand". Note that we
557a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao * still generate code for the original function.
567a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao */
577a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaoclass RSForEachExpandPass : public llvm::ModulePass {
5833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grosspublic:
59db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  static char ID;
60db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
6133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grossprivate:
62e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  static const size_t RS_KERNEL_INPUT_LIMIT = 8; // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h
63e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
64e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  enum RsLaunchDimensionsField {
65e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldX,
66e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldY,
67e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldZ,
68e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldLod,
69e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldFace,
70e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldArray,
71e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
72e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldCount
73e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  };
74e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
75e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  enum RsExpandKernelDriverInfoPfxField {
76e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInPtr,
77e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInStride,
78e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInLen,
79e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutPtr,
80e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutStride,
81e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutLen,
82e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldDim,
83e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldCurrent,
84e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldUsr,
85e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldUsLenr,
86e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
87e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldCount
88e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  };
8933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
90bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::Module *Module;
91bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::LLVMContext *Context;
92bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
93bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  /*
94e44a3525b9703739534c3b62d7d1af4c95649a38David Gross   * Pointer to LLVM type information for the the function signature
95e44a3525b9703739534c3b62d7d1af4c95649a38David Gross   * for expanded kernels.  This must be re-calculated for each
96bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes   * module the pass is run on.
97bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes   */
98bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::FunctionType *ExpandedFunctionType;
99db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
10025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  uint32_t mExportForEachCount;
10125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  const char **mExportForEachNameList;
10225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  const uint32_t *mExportForEachSignatureList;
103cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines
1042b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // Turns on optimization of allocation stride values.
1052b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  bool mEnableStepOpt;
1062b04086acbef6520ae2c54a868b1271abf053122Stephen Hines
107bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  uint32_t getRootSignature(llvm::Function *Function) {
108db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    const llvm::NamedMDNode *ExportForEachMetadata =
109bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes        Module->getNamedMetadata("#rs_export_foreach");
110db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
111db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (!ExportForEachMetadata) {
112db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      llvm::SmallVector<llvm::Type*, 8> RootArgTys;
113bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      for (llvm::Function::arg_iterator B = Function->arg_begin(),
114bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                                        E = Function->arg_end();
115db169187dea4602e4ad32058762d23d474753fd0Stephen Hines           B != E;
116db169187dea4602e4ad32058762d23d474753fd0Stephen Hines           ++B) {
117db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        RootArgTys.push_back(B->getType());
118db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      }
119db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
120db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // For pre-ICS bitcode, we may not have signature information. In that
121db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // case, we use the size of the RootArgTys to select the number of
122db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // arguments.
123db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      return (1 << RootArgTys.size()) - 1;
124db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
125db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1267ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    if (ExportForEachMetadata->getNumOperands() == 0) {
1277ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      return 0;
1287ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
1297ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
1306e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines    bccAssert(ExportForEachMetadata->getNumOperands() > 0);
131db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
132cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // We only handle the case for legacy root() functions here, so this is
133cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // hard-coded to look at only the first such function.
134db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
135900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    if (SigNode != nullptr && SigNode->getNumOperands() == 1) {
1361bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines      llvm::Metadata *SigMD = SigNode->getOperand(0);
1371bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines      if (llvm::MDString *SigS = llvm::dyn_cast<llvm::MDString>(SigMD)) {
1381bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines        llvm::StringRef SigString = SigS->getString();
139db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        uint32_t Signature = 0;
140db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        if (SigString.getAsInteger(10, Signature)) {
141db169187dea4602e4ad32058762d23d474753fd0Stephen Hines          ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
142db169187dea4602e4ad32058762d23d474753fd0Stephen Hines          return 0;
143db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        }
144db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        return Signature;
145db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      }
146db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
147db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
148db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return 0;
149db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
150db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
151429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray  bool isStepOptSupported(llvm::Type *AllocType) {
152429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
153429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
154429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
155429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
156429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (mEnableStepOpt) {
157429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
158429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
159429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
160429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType == VoidPtrTy) {
161429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
162429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
163429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
164429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (!PT) {
165429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
166429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
167429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
168429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // remaining conditions are 64-bit only
169429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (VoidPtrTy->getPrimitiveSizeInBits() == 32) {
170429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return true;
171429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
172429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
173429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // coerce suggests an upconverted struct type, which we can't support
174429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) {
175429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
176429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
177429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
178429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported
179429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2);
180429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128);
181429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType == V2xi64Ty || AllocType == Int128Ty) {
182429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
183429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
184429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
185429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    return true;
186429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray  }
187429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
1882b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // Get the actual value we should use to step through an allocation.
1897b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  //
1907b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // Normally the value we use to step through an allocation is given to us by
1917b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // the driver. However, for certain primitive data types, we can derive an
1927b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // integer constant for the step value. We use this integer constant whenever
1937b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // possible to allow further compiler optimizations to take place.
1947b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  //
195b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines  // DL - Target Data size/layout information.
1962b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // T - Type of allocation (should be a pointer).
1972b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // OrigStep - Original step increment (root.expand() input from driver).
198bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
1992b04086acbef6520ae2c54a868b1271abf053122Stephen Hines                            llvm::Value *OrigStep) {
200b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines    bccAssert(DL);
201bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bccAssert(AllocType);
2022b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    bccAssert(OrigStep);
203bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
204429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (isStepOptSupported(AllocType)) {
2052b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      llvm::Type *ET = PT->getElementType();
206b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines      uint64_t ETSize = DL->getTypeAllocSize(ET);
207bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
2082b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      return llvm::ConstantInt::get(Int32Ty, ETSize);
2092b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    } else {
2102b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      return OrigStep;
2112b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    }
2122b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  }
2132b04086acbef6520ae2c54a868b1271abf053122Stephen Hines
214097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes  /// Builds the types required by the pass for the given context.
215bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  void buildTypes(void) {
216e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    // Create the RsLaunchDimensionsTy and RsExpandKernelDriverInfoPfxTy structs.
217bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
218e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8Ty                   = llvm::Type::getInt8Ty(*Context);
219e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8PtrTy                = Int8Ty->getPointerTo();
220e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8PtrArrayInputLimitTy = llvm::ArrayType::get(Int8PtrTy, RS_KERNEL_INPUT_LIMIT);
221e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32Ty                  = llvm::Type::getInt32Ty(*Context);
222e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32ArrayInputLimitTy   = llvm::ArrayType::get(Int32Ty, RS_KERNEL_INPUT_LIMIT);
223e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *VoidPtrTy                = llvm::Type::getInt8PtrTy(*Context);
224e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32Array4Ty            = llvm::ArrayType::get(Int32Ty, 4);
225097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
226097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes    /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h:
227db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *
228e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * struct RsLaunchDimensions {
229e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t x;
230db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   uint32_t y;
231db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   uint32_t z;
232e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t lod;
233e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t face;
234e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t array[4];
235e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * };
236e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     */
237e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::SmallVector<llvm::Type*, RsLaunchDimensionsFieldCount> RsLaunchDimensionsTypes;
238e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t x
239e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t y
240e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t z
241e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t lod
242e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t face
243e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Array4Ty); // uint32_t array[4]
244e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::StructType *RsLaunchDimensionsTy =
245e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        llvm::StructType::create(RsLaunchDimensionsTypes, "RsLaunchDimensions");
246e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
2471d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross    /* Defined as the beginning of RsExpandKernelDriverInfo in frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h:
248e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
249e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * struct RsExpandKernelDriverInfoPfx {
250e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT];
251e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t inStride[RS_KERNEL_INPUT_LIMIT];
252e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t inLen;
253e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
254e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT];
255e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t outStride[RS_KERNEL_INPUT_LIMIT];
256e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t outLen;
257e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
258e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // Dimension of the launch
259e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     RsLaunchDimensions dim;
260e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
261e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // The walking iterator of the launch
262e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     RsLaunchDimensions current;
263e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
264e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     const void *usr;
265e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t usrLen;
266e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
267e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // Items below this line are not used by the compiler and can be change in the driver.
268e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // So the compiler must assume there are an unknown number of fields of unknown type
269e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // beginning here.
270db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     * };
2711d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross     *
2721d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross     * The name "RsExpandKernelDriverInfoPfx" is known to RSInvariantPass (RSInvariant.cpp).
273db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     */
274e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::SmallVector<llvm::Type*, RsExpandKernelDriverInfoPfxFieldCount> RsExpandKernelDriverInfoPfxTypes;
275e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT]
276e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy);   // uint32_t inStride[RS_KERNEL_INPUT_LIMIT]
277e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t inLen
278e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT]
279e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy);   // uint32_t outStride[RS_KERNEL_INPUT_LIMIT]
280e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t outLen
281e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy);     // RsLaunchDimensions dim
282e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy);     // RsLaunchDimensions current
283e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy);                // const void *usr
284e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t usrLen
285e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::StructType *RsExpandKernelDriverInfoPfxTy =
286e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx");
287bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
288bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    // Create the function type for expanded kernels.
289bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
290e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *RsExpandKernelDriverInfoPfxPtrTy = RsExpandKernelDriverInfoPfxTy->getPointerTo();
291bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
292bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::SmallVector<llvm::Type*, 8> ParamTypes;
293e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    ParamTypes.push_back(RsExpandKernelDriverInfoPfxPtrTy); // const RsExpandKernelDriverInfoPfx *p
294e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    ParamTypes.push_back(Int32Ty);                          // uint32_t x1
295e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    ParamTypes.push_back(Int32Ty);                          // uint32_t x2
296e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    ParamTypes.push_back(Int32Ty);                          // uint32_t outstep
297bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
298e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    ExpandedFunctionType =
299e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes        llvm::FunctionType::get(llvm::Type::getVoidTy(*Context), ParamTypes,
300e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                false);
3018ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  }
3028ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
303357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  /// @brief Create skeleton of the expanded function.
304357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
305357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  /// This creates a function with the following signature:
306357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
307357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
3085010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes  ///         uint32_t outstep)
309357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
310357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) {
311bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
312bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Function::Create(ExpandedFunctionType,
313bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                             llvm::GlobalValue::ExternalLinkage,
314bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                             OldName + ".expand", Module);
315bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
316bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
317bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
318bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
319bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
320bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("p");
321bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("x1");
322bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("x2");
323bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("arg_outstep");
324bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
325bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
326bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                                                       ExpandedFunction);
327806075b3a54af826fea78490fb213d8a0784138eTobias Grosser    llvm::IRBuilder<> Builder(Begin);
328806075b3a54af826fea78490fb213d8a0784138eTobias Grosser    Builder.CreateRetVoid();
329806075b3a54af826fea78490fb213d8a0784138eTobias Grosser
330bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    return ExpandedFunction;
331357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  }
332357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser
333e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @brief Create an empty loop
334e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
335e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// Create a loop of the form:
336e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
337e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// for (i = LowerBound; i < UpperBound; i++)
338e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///   ;
339e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
340e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// After the loop has been created, the builder is set such that
341e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// instructions can be added to the loop body.
342e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
343e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param Builder The builder to use to build this loop. The current
344e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///                position of the builder is the position the loop
345e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///                will be inserted.
346e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param LowerBound The first value of the loop iterator
347e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param UpperBound The maximal value of the loop iterator
348e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param LoopIV A reference that will be set to the loop iterator.
349e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @return The BasicBlock that will be executed after the loop.
350e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
351e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser                               llvm::Value *LowerBound,
352e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser                               llvm::Value *UpperBound,
353e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser                               llvm::PHINode **LoopIV) {
354c2ca742d7d0197c52e49467862844463fb42280fDavid Gross    bccAssert(LowerBound->getType() == UpperBound->getType());
355e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
356e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
357e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    llvm::Value *Cond, *IVNext;
358e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    llvm::PHINode *IV;
359e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
360e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    CondBB = Builder.GetInsertBlock();
3611bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines    AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), nullptr, nullptr);
362bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
363e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
364e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // if (LowerBound < Upperbound)
365e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto LoopHeader
366e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // else
367e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto AfterBB
368e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    CondBB->getTerminator()->eraseFromParent();
369e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.SetInsertPoint(CondBB);
370e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser    Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
371e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
372e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
373e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
374e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // iv.next = iv + 1
375e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // if (iv.next < Upperbound)
376e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto LoopHeader
377e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // else
378e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto AfterBB
379e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.SetInsertPoint(HeaderBB);
380e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
381e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    IV->addIncoming(LowerBound, CondBB);
382e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
383e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    IV->addIncoming(IVNext, HeaderBB);
384e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser    Cond = Builder.CreateICmpULT(IVNext, UpperBound);
385e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
386e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    AfterBB->setName("Exit");
387e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
388e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    *LoopIV = IV;
389e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    return AfterBB;
390e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  }
391e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
3928ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosserpublic:
39333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  RSForEachExpandPass(bool pEnableStepOpt = true)
394900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes      : ModulePass(ID), Module(nullptr), Context(nullptr),
395bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes        mEnableStepOpt(pEnableStepOpt) {
396bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
3978ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  }
3988ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
399c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines  virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
400c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines    // This pass does not use any other analysis passes, but it does
401c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines    // add/wrap the existing functions in the module (thus altering the CFG).
402c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines  }
403c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines
40433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // Build contribution to outgoing argument list for calling a
40533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // ForEach-able function, based on the special parameters of that
40633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // function.
40733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  //
40833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // Signature - metadata bits for the signature of the ForEach-able function
40933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // X, Arg_p - values derived directly from expanded function,
41033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  //            suitable for computing arguments for the ForEach-able function
41133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // CalleeArgs - contribution is accumulated here
41233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // Bump - invoked once for each contributed outgoing argument
41333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  void ExpandSpecialArguments(uint32_t Signature,
41433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross                              llvm::Value *X,
41533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross                              llvm::Value *Arg_p,
41633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross                              llvm::IRBuilder<> &Builder,
41733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross                              llvm::SmallVector<llvm::Value*, 8> &CalleeArgs,
41833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross                              std::function<void ()> Bump) {
41933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
42033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) {
42133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      CalleeArgs.push_back(Arg_p);
42233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      Bump();
42333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
42433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
42533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
42633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      CalleeArgs.push_back(X);
42733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      Bump();
42833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
42933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
430e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature) ||
431e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
43233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
43398137cca7eebca946b869b010fef2821c9bf4971Pirama Arumuga Nainar      llvm::Value *Current = Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldCurrent);
434e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
435e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
436e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        llvm::Value *Y = Builder.CreateLoad(
43798137cca7eebca946b869b010fef2821c9bf4971Pirama Arumuga Nainar            Builder.CreateStructGEP(nullptr, Current, RsLaunchDimensionsFieldY), "Y");
43898137cca7eebca946b869b010fef2821c9bf4971Pirama Arumuga Nainar
439e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        CalleeArgs.push_back(Y);
440e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        Bump();
441e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      }
442e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
443e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
444e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        llvm::Value *Z = Builder.CreateLoad(
44598137cca7eebca946b869b010fef2821c9bf4971Pirama Arumuga Nainar            Builder.CreateStructGEP(nullptr, Current, RsLaunchDimensionsFieldZ), "Z");
446e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        CalleeArgs.push_back(Z);
447e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        Bump();
448e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      }
44933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
45033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  }
45133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
4528ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  /* Performs the actual optimization on a selected function. On success, the
4538ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   * Module will contain a new function of the name "<NAME>.expand" that
4548ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   * invokes <NAME>() in a loop with the appropriate parameters.
4558ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   */
456bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  bool ExpandFunction(llvm::Function *Function, uint32_t Signature) {
457bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    ALOGV("Expanding ForEach-able Function %s",
458bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes          Function->getName().str().c_str());
4598ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
4608ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser    if (!Signature) {
461bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      Signature = getRootSignature(Function);
4628ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser      if (!Signature) {
4638ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        // We couldn't determine how to expand this function based on its
4648ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        // function signature.
4658ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        return false;
4668ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser      }
4678ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser    }
4688ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
469bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::DataLayout DL(Module);
4708ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
471bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
472bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      createEmptyExpandedFunction(Function->getName());
473db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
474bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    /*
475bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * Extract the expanded function's parameters.  It is guaranteed by
476bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * createEmptyExpandedFunction that there will be five parameters.
477bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     */
47833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
47933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
48033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
481bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator ExpandedFunctionArgIter =
482bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      ExpandedFunction->arg_begin();
483db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
484bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
485bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
486bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
4875010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes    llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
488bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
489900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *InStep  = nullptr;
490900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutStep = nullptr;
491db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
492db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // Construct the actual function body.
493bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
494db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
495cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // Collect and construct the arguments for the kernel().
496db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // Note that we load any loop-invariant arguments before entering the Loop.
497bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
498db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
499900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Type  *InTy      = nullptr;
500900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *InBasePtr = nullptr;
501d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
50298137cca7eebca946b869b010fef2821c9bf4971Pirama Arumuga Nainar      llvm::Value *InsBasePtr  = Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldInPtr, "inputs_base");
503e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
50498137cca7eebca946b869b010fef2821c9bf4971Pirama Arumuga Nainar      llvm::Value *InStepsBase = Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldInStride, "insteps_base");
505e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
50698137cca7eebca946b869b010fef2821c9bf4971Pirama Arumuga Nainar      llvm::Value    *InStepAddr = Builder.CreateConstInBoundsGEP2_32(nullptr, InStepsBase, 0, 0);
507e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes      llvm::LoadInst *InStepArg  = Builder.CreateLoad(InStepAddr,
508e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                      "instep_addr");
509e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
510bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      InTy = (FunctionArgIter++)->getType();
511e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes      InStep = getStepValue(&DL, InTy, InStepArg);
512e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
5132b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      InStep->setName("instep");
514e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
51598137cca7eebca946b869b010fef2821c9bf4971Pirama Arumuga Nainar      llvm::Value *InputAddr = Builder.CreateConstInBoundsGEP2_32(nullptr, InsBasePtr, 0, 0);
516e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes      InBasePtr = Builder.CreateLoad(InputAddr, "input_base");
517db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
518db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
519900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Type *OutTy = nullptr;
520900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutBasePtr = nullptr;
521d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
522bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      OutTy = (FunctionArgIter++)->getType();
523b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
5242b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      OutStep->setName("outstep");
525097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes      OutBasePtr = Builder.CreateLoad(
52698137cca7eebca946b869b010fef2821c9bf4971Pirama Arumuga Nainar                     Builder.CreateConstInBoundsGEP2_32(nullptr,
52798137cca7eebca946b869b010fef2821c9bf4971Pirama Arumuga Nainar                         Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldOutPtr),
52898137cca7eebca946b869b010fef2821c9bf4971Pirama Arumuga Nainar                         0, 0));
529db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
530db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
531900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *UsrData = nullptr;
532d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
533bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
534db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
53598137cca7eebca946b869b010fef2821c9bf4971Pirama Arumuga Nainar          Builder.CreateStructGEP(nullptr, Arg_p,  RsExpandKernelDriverInfoPfxFieldUsr)), UsrDataTy);
536db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      UsrData->setName("UsrData");
537db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
538db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
53933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    llvm::PHINode *IV;
54033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    createLoop(Builder, Arg_x1, Arg_x2, &IV);
541097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
54233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
54333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
54433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross                           [&FunctionArgIter]() { FunctionArgIter++; });
545db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
546bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bccAssert(FunctionArgIter == Function->arg_end());
547db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
548cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // Populate the actual call to kernel().
549db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::SmallVector<llvm::Value*, 8> RootArgs;
550db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
551900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *InPtr  = nullptr;
552900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutPtr = nullptr;
553db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
554ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    // Calculate the current input and output pointers
55502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    //
556ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    // We always calculate the input/output pointers with a GEP operating on i8
55702f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // values and only cast at the very end to OutTy. This is because the step
55802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // between two values is given in bytes.
55902f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    //
56002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // TODO: We could further optimize the output by using a GEP operation of
56102f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // type 'OutTy' in cases where the element type of the allocation allows.
56202f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    if (OutBasePtr) {
56302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
56402f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      OutOffset = Builder.CreateMul(OutOffset, OutStep);
56502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
56602f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
56702f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    }
568bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
569ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    if (InBasePtr) {
570ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
571ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      InOffset = Builder.CreateMul(InOffset, InStep);
572ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      InPtr = Builder.CreateGEP(InBasePtr, InOffset);
573ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      InPtr = Builder.CreatePointerCast(InPtr, InTy);
574ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    }
57502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser
576ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    if (InPtr) {
5777ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      RootArgs.push_back(InPtr);
578db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
579db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
58002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    if (OutPtr) {
5817ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      RootArgs.push_back(OutPtr);
582db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
583db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
584db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (UsrData) {
585db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      RootArgs.push_back(UsrData);
586db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
587db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
58833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    RootArgs.append(CalleeArgs.begin(), CalleeArgs.end());
589db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
590bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    Builder.CreateCall(Function, RootArgs);
591db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
5927ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    return true;
5937ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines  }
5947ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
5957ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines  /* Expand a pass-by-value kernel.
5967ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines   */
597bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  bool ExpandKernel(llvm::Function *Function, uint32_t Signature) {
598d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
599bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
6007ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
6017ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    // TODO: Refactor this to share functionality with ExpandFunction.
602bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::DataLayout DL(Module);
6037ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
604bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
605bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      createEmptyExpandedFunction(Function->getName());
6067ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
607bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    /*
608bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * Extract the expanded function's parameters.  It is guaranteed by
609bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * createEmptyExpandedFunction that there will be five parameters.
610bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     */
611881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
612881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
613881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
614bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator ExpandedFunctionArgIter =
615bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      ExpandedFunction->arg_begin();
616bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
617bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
618bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
619bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
6205010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes    llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
6217ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
6227ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    // Construct the actual function body.
623bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
6247ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
62518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Create TBAA meta-data.
626354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript,
627354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines                 *TBAAAllocation, *TBAAPointer;
628bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDBuilder MDHelper(*Context);
62914588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien
630354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    TBAARenderScriptDistinct =
631354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines      MDHelper.createTBAARoot("RenderScript Distinct TBAA");
632354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    TBAARenderScript = MDHelper.createTBAANode("RenderScript TBAA",
633354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines        TBAARenderScriptDistinct);
634e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
635e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                       TBAARenderScript);
636e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
637e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                      TBAAAllocation, 0);
638e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
639e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                    TBAARenderScript);
64014588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien    TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
64118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
64250f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray    llvm::MDNode *AliasingDomain, *AliasingScope;
64350f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray    AliasingDomain = MDHelper.createAnonymousAliasScopeDomain("RS argument scope domain");
64450f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray    AliasingScope = MDHelper.createAnonymousAliasScope(AliasingDomain, "RS argument scope");
64550f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray
646881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    /*
647881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     * Collect and construct the arguments for the kernel().
648881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     *
649881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     * Note that we load any loop-invariant arguments before entering the Loop.
650881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     */
651881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    size_t NumInputs = Function->arg_size();
6527ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
653881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // No usrData parameter on kernels.
654881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    bccAssert(
655881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
656881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
657881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    llvm::Function::arg_iterator ArgIter = Function->arg_begin();
658881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
659881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Check the return type
660bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::Type     *OutTy            = nullptr;
661bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::Value    *OutStep          = nullptr;
662bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::LoadInst *OutBasePtr       = nullptr;
663bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::Value    *CastedOutBasePtr = nullptr;
664881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
665e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    bool PassOutByPointer = false;
666881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
667d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
668bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *OutBaseTy = Function->getReturnType();
669881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
67074a4b08235990916911b8fe758d656c1171faf26Stephen Hines      if (OutBaseTy->isVoidTy()) {
671e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes        PassOutByPointer = true;
672881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        OutTy = ArgIter->getType();
673881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
674881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        ArgIter++;
675881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        --NumInputs;
67674a4b08235990916911b8fe758d656c1171faf26Stephen Hines      } else {
67774a4b08235990916911b8fe758d656c1171faf26Stephen Hines        // We don't increment Args, since we are using the actual return type.
678881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        OutTy = OutBaseTy->getPointerTo();
67974a4b08235990916911b8fe758d656c1171faf26Stephen Hines      }
680881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
681b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
68274a4b08235990916911b8fe758d656c1171faf26Stephen Hines      OutStep->setName("outstep");
683097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes      OutBasePtr = Builder.CreateLoad(
68498137cca7eebca946b869b010fef2821c9bf4971Pirama Arumuga Nainar                     Builder.CreateConstInBoundsGEP2_32(nullptr,
68598137cca7eebca946b869b010fef2821c9bf4971Pirama Arumuga Nainar                         Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldOutPtr),
68698137cca7eebca946b869b010fef2821c9bf4971Pirama Arumuga Nainar                         0, 0));
687097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
6889c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      if (gEnableRsTbaa) {
6899c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines        OutBasePtr->setMetadata("tbaa", TBAAPointer);
6909c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      }
69150f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray
69250f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray      OutBasePtr->setMetadata("alias.scope", AliasingScope);
69350f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray
694bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray      CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out");
69574a4b08235990916911b8fe758d656c1171faf26Stephen Hines    }
69674a4b08235990916911b8fe758d656c1171faf26Stephen Hines
69733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    llvm::PHINode *IV;
69833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    createLoop(Builder, Arg_x1, Arg_x2, &IV);
69933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
70033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
70133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
70233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross                           [&NumInputs]() { --NumInputs; });
70333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
704bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::SmallVector<llvm::Type*,  8> InTypes;
705bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::SmallVector<llvm::Value*, 8> InSteps;
706bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::SmallVector<llvm::Value*, 8> InBasePtrs;
707bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::SmallVector<bool,         8> InIsStructPointer;
708881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
709e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    bccAssert(NumInputs <= RS_KERNEL_INPUT_LIMIT);
710e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
711e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    if (NumInputs > 0) {
71298137cca7eebca946b869b010fef2821c9bf4971Pirama Arumuga Nainar      llvm::Value *InsBasePtr  = Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldInPtr, "inputs_base");
7137ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
71498137cca7eebca946b869b010fef2821c9bf4971Pirama Arumuga Nainar      llvm::Value *InStepsBase = Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldInStride, "insteps_base");
715881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
716881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes      for (size_t InputIndex = 0; InputIndex < NumInputs;
717881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes           ++InputIndex, ArgIter++) {
718881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
71998137cca7eebca946b869b010fef2821c9bf4971Pirama Arumuga Nainar          llvm::Value    *InStepAddr = Builder.CreateConstInBoundsGEP2_32(nullptr, InStepsBase, 0, InputIndex);
720881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes          llvm::LoadInst *InStepArg  = Builder.CreateLoad(InStepAddr,
721881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes                                                          "instep_addr");
722881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
723326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes          llvm::Type *InType = ArgIter->getType();
724326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes
725326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes        /*
726326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes         * AArch64 calling dictate that structs of sufficient size get passed by
727326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes         * pointer instead of passed by value.  This, combined with the fact
728326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes         * that we don't allow kernels to operate on pointer data means that if
729326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes         * we see a kernel with a pointer parameter we know that it is struct
730326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes         * input that has been promoted.  As such we don't need to convert its
731326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes         * type to a pointer.  Later we will need to know to avoid a load, so we
732326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes         * save this information in InIsStructPointer.
733326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes         */
734326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes          if (!InType->isPointerTy()) {
735326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes            InType = InType->getPointerTo();
736326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes            InIsStructPointer.push_back(false);
737326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes          } else {
738326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes            InIsStructPointer.push_back(true);
739326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes          }
740326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes
741881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes          llvm::Value *InStep = getStepValue(&DL, InType, InStepArg);
742881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
743881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes          InStep->setName("instep");
744881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
74598137cca7eebca946b869b010fef2821c9bf4971Pirama Arumuga Nainar          llvm::Value    *InputAddr = Builder.CreateConstInBoundsGEP2_32(nullptr, InsBasePtr, 0, InputIndex);
746881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes          llvm::LoadInst *InBasePtr = Builder.CreateLoad(InputAddr,
747881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes                                                         "input_base");
748bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray          llvm::Value    *CastInBasePtr = Builder.CreatePointerCast(InBasePtr,
749bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray                                                                    InType, "casted_in");
750881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes          if (gEnableRsTbaa) {
751881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes            InBasePtr->setMetadata("tbaa", TBAAPointer);
752881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes          }
753881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
75450f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray          InBasePtr->setMetadata("alias.scope", AliasingScope);
75550f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray
756881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes          InTypes.push_back(InType);
757881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes          InSteps.push_back(InStep);
758bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray          InBasePtrs.push_back(CastInBasePtr);
759881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes      }
760881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    }
7617ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
7627ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    // Populate the actual call to kernel().
7637ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    llvm::SmallVector<llvm::Value*, 8> RootArgs;
7647ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
7654102bec56151fb5d9c962fb298412f34a6eacaa8Tobias Grosser    // Calculate the current input and output pointers
7667b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser    //
7677b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser    //
768881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // We always calculate the input/output pointers with a GEP operating on i8
769881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // values combined with a multiplication and only cast at the very end to
770881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // OutTy.  This is to account for dynamic stepping sizes when the value
771881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // isn't apparent at compile time.  In the (very common) case when we know
772881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // the step size at compile time, due to haveing complete type information
773881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // this multiplication will optmized out and produces code equivalent to a
774881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // a GEP on a pointer of the correct type.
775881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
776881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Output
777881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
778900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutPtr = nullptr;
779bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    if (CastedOutBasePtr) {
7807b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
781881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
782bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray      OutPtr    = Builder.CreateGEP(CastedOutBasePtr, OutOffset);
783bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
784e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes      if (PassOutByPointer) {
785881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        RootArgs.push_back(OutPtr);
786881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes      }
7874102bec56151fb5d9c962fb298412f34a6eacaa8Tobias Grosser    }
7887b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser
789881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Inputs
79074a4b08235990916911b8fe758d656c1171faf26Stephen Hines
791881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    if (NumInputs > 0) {
792881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes      llvm::Value *Offset = Builder.CreateSub(IV, Arg_x1);
793881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
794881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes      for (size_t Index = 0; Index < NumInputs; ++Index) {
795bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray        llvm::Value *InPtr    = Builder.CreateGEP(InBasePtrs[Index], Offset);
796326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes        llvm::Value *Input;
797326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes
798326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes        if (InIsStructPointer[Index]) {
799326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes          Input = InPtr;
800326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes
801326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes        } else {
802326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes          llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
803326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes
804326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes          if (gEnableRsTbaa) {
805326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes            InputLoad->setMetadata("tbaa", TBAAAllocation);
806326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes          }
807881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
80850f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray          InputLoad->setMetadata("alias.scope", AliasingScope);
80950f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray
810326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes          Input = InputLoad;
811881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        }
812881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
813881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        RootArgs.push_back(Input);
8149c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      }
8157ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
8167ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
81733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    RootArgs.append(CalleeArgs.begin(), CalleeArgs.end());
8187ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
819bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
8207ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
821e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    if (OutPtr && !PassOutByPointer) {
82218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
8239c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      if (gEnableRsTbaa) {
8249c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines        Store->setMetadata("tbaa", TBAAAllocation);
8259c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      }
82650f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray      Store->setMetadata("alias.scope", AliasingScope);
8277ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
8287ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
829db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return true;
830db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
831db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
83218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// @brief Checks if pointers to allocation internals are exposed
83318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  ///
83418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// This function verifies if through the parameters passed to the kernel
83518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// or through calls to the runtime library the script gains access to
83618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// pointers pointing to data within a RenderScript Allocation.
83718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// If we know we control all loads from and stores to data within
83818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// RenderScript allocations and if we know the run-time internal accesses
83918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// are all annotated with RenderScript TBAA metadata, only then we
84018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// can safely use TBAA to distinguish between generic and from-allocation
84118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// pointers.
842bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  bool allocPointersExposed(llvm::Module &Module) {
84318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Old style kernel function can expose pointers to elements within
84418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // allocations.
84518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // TODO: Extend analysis to allow simple cases of old-style kernels.
84625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    for (size_t i = 0; i < mExportForEachCount; ++i) {
84725eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      const char *Name = mExportForEachNameList[i];
84825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      uint32_t Signature = mExportForEachSignatureList[i];
849bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (Module.getFunction(Name) &&
850d88177580db4ddedf680854c51db333c97eabc59Stephen Hines          !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
85118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
85218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
85318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
85418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
85518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Check for library functions that expose a pointer to an Allocation or
85618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // that are not yet annotated with RenderScript-specific tbaa information.
85718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    static std::vector<std::string> Funcs;
85818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
85918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // rsGetElementAt(...)
86018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    Funcs.push_back("_Z14rsGetElementAt13rs_allocationj");
86118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    Funcs.push_back("_Z14rsGetElementAt13rs_allocationjj");
86218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    Funcs.push_back("_Z14rsGetElementAt13rs_allocationjjj");
86318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // rsSetElementAt()
86418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvj");
86518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjj");
86618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjjj");
86718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // rsGetElementAtYuv_uchar_Y()
86818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj");
86918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // rsGetElementAtYuv_uchar_U()
87018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj");
87118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // rsGetElementAtYuv_uchar_V()
87218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    Funcs.push_back("_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj");
87318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
87418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    for (std::vector<std::string>::iterator FI = Funcs.begin(),
87518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser                                            FE = Funcs.end();
87618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser         FI != FE; ++FI) {
877bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Function *Function = Module.getFunction(*FI);
87818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
879bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (!Function) {
88018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        ALOGE("Missing run-time function '%s'", FI->c_str());
88118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
88218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
88318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
884bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (Function->getNumUses() > 0) {
88518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
88618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
88718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
88818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
88918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    return false;
89018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  }
89118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
89218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
89318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  ///
89418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// The TBAA metadata used to annotate loads/stores from RenderScript
895e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// Allocations is generated in a separate TBAA tree with a
896354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// "RenderScript Distinct TBAA" root node. LLVM does assume may-alias for
897354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// all nodes in unrelated alias analysis trees. This function makes the
898354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// "RenderScript TBAA" node (which is parented by the Distinct TBAA root),
899e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With
900e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// the connected trees every access to an Allocation is resolved to
901e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// must-alias if compared to a normal C/C++ access.
902bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
903bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDBuilder MDHelper(*Context);
904354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScriptDistinct =
905354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines      MDHelper.createTBAARoot("RenderScript Distinct TBAA");
906354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScript = MDHelper.createTBAANode(
907354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines        "RenderScript TBAA", TBAARenderScriptDistinct);
908bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDNode *TBAARoot     = MDHelper.createTBAARoot("Simple C/C++ TBAA");
909354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    TBAARenderScript->replaceOperandWith(1, TBAARoot);
91018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  }
91118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
912bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  virtual bool runOnModule(llvm::Module &Module) {
913bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bool Changed  = false;
914bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    this->Module  = &Module;
915bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    this->Context = &Module.getContext();
916bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
917bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    this->buildTypes();
918bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
919bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bcinfo::MetadataExtractor me(&Module);
92025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    if (!me.extract()) {
92125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      ALOGE("Could not extract metadata from module!");
92225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      return false;
92325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    }
92425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachCount = me.getExportForEachSignatureCount();
92525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachNameList = me.getExportForEachNameList();
92625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachSignatureList = me.getExportForEachSignatureList();
927db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
928bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bool AllocsExposed = allocPointersExposed(Module);
92918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
93025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    for (size_t i = 0; i < mExportForEachCount; ++i) {
93125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      const char *name = mExportForEachNameList[i];
93225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      uint32_t signature = mExportForEachSignatureList[i];
933bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Function *kernel = Module.getFunction(name);
934cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser      if (kernel) {
935d88177580db4ddedf680854c51db333c97eabc59Stephen Hines        if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
936cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser          Changed |= ExpandKernel(kernel, signature);
937acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
938acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        } else if (kernel->getReturnType()->isVoidTy()) {
939cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser          Changed |= ExpandFunction(kernel, signature);
940acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
941acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        } else {
942acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // There are some graphics root functions that are not
943acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // expanded, but that will be called directly. For those
944acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // functions, we can not set the linkage to internal.
945acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        }
946cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines      }
947db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
948db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
9499c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines    if (gEnableRsTbaa && !AllocsExposed) {
950bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      connectRenderScriptTBAAMetadata(Module);
95118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
95218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
953cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    return Changed;
954db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
955db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
956db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  virtual const char *getPassName() const {
957db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return "ForEach-able Function Expansion";
958db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
959db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
9607a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao}; // end RSForEachExpandPass
961db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
9627a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end anonymous namespace
9637a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
9647a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaochar RSForEachExpandPass::ID = 0;
96533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grossstatic llvm::RegisterPass<RSForEachExpandPass> X("foreachexp", "ForEach Expand Pass");
966db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
967db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace bcc {
968db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
9697a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaollvm::ModulePass *
97025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen HinescreateRSForEachExpandPass(bool pEnableStepOpt){
97125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  return new RSForEachExpandPass(pEnableStepOpt);
9727a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao}
973db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
9747a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end namespace bcc
975