1db169187dea4602e4ad32058762d23d474753fd0Stephen Hines/*
2db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Copyright 2012, The Android Open Source Project
3db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
4db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Licensed under the Apache License, Version 2.0 (the "License");
5db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * you may not use this file except in compliance with the License.
6db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * You may obtain a copy of the License at
7db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
8db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *     http://www.apache.org/licenses/LICENSE-2.0
9db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
10db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Unless required by applicable law or agreed to in writing, software
11db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * distributed under the License is distributed on an "AS IS" BASIS,
12db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * See the License for the specific language governing permissions and
14db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * limitations under the License.
15db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */
16db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
17a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet#include "Assert.h"
18a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet#include "Log.h"
19a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet#include "RSTransforms.h"
20a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet#include "RSUtils.h"
21a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet
22a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet#include "bcc/Config.h"
23a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet#include "bcinfo/MetadataExtractor.h"
247a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
2597e50993c70083fdedb4f1dd2c487aa55c6f60cfDavid Gross#include "slang_version.h"
2697e50993c70083fdedb4f1dd2c487aa55c6f60cfDavid Gross
277a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao#include <cstdlib>
2833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross#include <functional>
29e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross#include <unordered_set>
307a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
31b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DerivedTypes.h>
32b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Function.h>
33b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Instructions.h>
34b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/IRBuilder.h>
3518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser#include <llvm/IR/MDBuilder.h>
36b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Module.h>
37c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Pass.h>
387ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines#include <llvm/Support/raw_ostream.h>
39b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DataLayout.h>
40cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser#include <llvm/IR/Function.h>
41b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Type.h>
42806075b3a54af826fea78490fb213d8a0784138eTobias Grosser#include <llvm/Transforms/Utils/BasicBlockUtils.h>
43c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang
444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala#ifndef __DISABLE_ASSERTS
454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala// Only used in bccAssert()
464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst int kNumExpandedForeachParams = 4;
479fa4d4480252ecfe08c97bc35888360b1e19ec99David Grossconst int kNumExpandedReduceAccumulatorParams = 4;
484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala#endif
494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst char kRenderScriptTBAARootName[] = "RenderScript Distinct TBAA";
514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst char kRenderScriptTBAANodeName[] = "RenderScript TBAA";
52bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
537a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaousing namespace bcc;
547a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
55db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace {
567a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
57354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hinesstatic const bool gEnableRsTbaa = true;
589c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines
59797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross/* RSKernelExpandPass
60797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross *
61797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * This pass generates functions used to implement calls via
62797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * rsForEach(), "foreach_<NAME>", or "reduce_<NAME>". We create an
63797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * inner loop for the function to be invoked over the appropriate data
64797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * cells of the input/output allocations (adjusting other relevant
65797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * parameters as we go). We support doing this for any forEach or
66797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * reduce style compute kernels.
67797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross *
68797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * In the case of a foreach kernel or a simple reduction kernel, the
69797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * new function name is the original function name "<NAME>" followed
70797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * by ".expand" -- "<NAME>.expand".
71797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross *
72797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * In the case of a general reduction kernel, the kernel's accumulator
73797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * function is the one transformed, and the new function name is the
74797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * original accumulator function name "<ACCUMFN>" followed by
75797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * ".expand" -- "<ACCUMFN>.expand". Using the name "<ACCUMFN>.expand"
76797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * for the function generated from the accumulator should not
77797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * introduce any possibility for name clashes today: The accumulator
78797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * function <ACCUMFN> must be static, so it cannot also serve as a
79797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * foreach kernel; and the code for <ACCUMFN>.expand depends only on
80797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * <ACCUMFN>, not on any other properties of the reduction kernel, so
81797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * any reduction kernels that share the accumulator <ACCUMFN> can
82797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * share <ACCUMFN>.expand also.
83797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross *
84797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * Note that this pass does not delete the original function <NAME> or
85797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * <ACCUMFN>. However, if it is inlined into the newly-generated
86797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * function and not otherwise referenced, then a subsequent pass may
87797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * delete it.
887a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao */
894e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaclass RSKernelExpandPass : public llvm::ModulePass {
9033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grosspublic:
91db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  static char ID;
92db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
9333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grossprivate:
9497e50993c70083fdedb4f1dd2c487aa55c6f60cfDavid Gross  static const size_t RS_KERNEL_INPUT_LIMIT = 8;  // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h
95e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
96e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  typedef std::unordered_set<llvm::Function *> FunctionSet;
97e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
98e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  enum RsLaunchDimensionsField {
99e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldX,
100e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldY,
101e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldZ,
102e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldLod,
103e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldFace,
104e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldArray,
105e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
106e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldCount
107e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  };
108e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
109e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  enum RsExpandKernelDriverInfoPfxField {
110e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInPtr,
111e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInStride,
112e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInLen,
113e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutPtr,
114e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutStride,
115e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutLen,
116e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldDim,
117e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldCurrent,
118e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldUsr,
119e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldUsLenr,
120e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
121e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldCount
122e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  };
12333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
124bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::Module *Module;
125bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::LLVMContext *Context;
126bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
127bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  /*
1284e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala   * Pointers to LLVM type information for the the function signatures
1294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala   * for expanded functions. These must be re-calculated for each module
1304e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala   * the pass is run on.
131bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes   */
1329fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross  llvm::FunctionType *ExpandedForEachType;
133e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  llvm::Type *RsExpandKernelDriverInfoPfxTy;
134db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
13597e50993c70083fdedb4f1dd2c487aa55c6f60cfDavid Gross  // Initialized when we begin to process each Module
13697e50993c70083fdedb4f1dd2c487aa55c6f60cfDavid Gross  bool mStructExplicitlyPaddedBySlang;
13725eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  uint32_t mExportForEachCount;
13825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  const char **mExportForEachNameList;
13925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  const uint32_t *mExportForEachSignatureList;
140cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines
1412b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // Turns on optimization of allocation stride values.
1422b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  bool mEnableStepOpt;
1432b04086acbef6520ae2c54a868b1271abf053122Stephen Hines
144bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  uint32_t getRootSignature(llvm::Function *Function) {
145db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    const llvm::NamedMDNode *ExportForEachMetadata =
146bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes        Module->getNamedMetadata("#rs_export_foreach");
147db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
148db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (!ExportForEachMetadata) {
149db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      llvm::SmallVector<llvm::Type*, 8> RootArgTys;
150bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      for (llvm::Function::arg_iterator B = Function->arg_begin(),
151bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                                        E = Function->arg_end();
152db169187dea4602e4ad32058762d23d474753fd0Stephen Hines           B != E;
153db169187dea4602e4ad32058762d23d474753fd0Stephen Hines           ++B) {
154db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        RootArgTys.push_back(B->getType());
155db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      }
156db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
157db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // For pre-ICS bitcode, we may not have signature information. In that
158db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // case, we use the size of the RootArgTys to select the number of
159db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // arguments.
160db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      return (1 << RootArgTys.size()) - 1;
161db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
162db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1637ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    if (ExportForEachMetadata->getNumOperands() == 0) {
1647ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      return 0;
1657ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
1667ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
1676e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines    bccAssert(ExportForEachMetadata->getNumOperands() > 0);
168db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
169cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // We only handle the case for legacy root() functions here, so this is
170cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // hard-coded to look at only the first such function.
171db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
172900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    if (SigNode != nullptr && SigNode->getNumOperands() == 1) {
1731bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines      llvm::Metadata *SigMD = SigNode->getOperand(0);
1741bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines      if (llvm::MDString *SigS = llvm::dyn_cast<llvm::MDString>(SigMD)) {
1751bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines        llvm::StringRef SigString = SigS->getString();
176db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        uint32_t Signature = 0;
177db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        if (SigString.getAsInteger(10, Signature)) {
178db169187dea4602e4ad32058762d23d474753fd0Stephen Hines          ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
179db169187dea4602e4ad32058762d23d474753fd0Stephen Hines          return 0;
180db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        }
181db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        return Signature;
182db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      }
183db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
184db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
185db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return 0;
186db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
187db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
188429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray  bool isStepOptSupported(llvm::Type *AllocType) {
189429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
190429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
191429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
192429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
193429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (mEnableStepOpt) {
194429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
195429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
196429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
197429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType == VoidPtrTy) {
198429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
199429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
200429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
201429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (!PT) {
202429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
203429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
204429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
205429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // remaining conditions are 64-bit only
206429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (VoidPtrTy->getPrimitiveSizeInBits() == 32) {
207429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return true;
208429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
209429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
210429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // coerce suggests an upconverted struct type, which we can't support
211429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) {
212429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
213429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
214429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
215429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported
216429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2);
217429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128);
218429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType == V2xi64Ty || AllocType == Int128Ty) {
219429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
220429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
221429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
222429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    return true;
223429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray  }
224429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
2252b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // Get the actual value we should use to step through an allocation.
2267b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  //
2277b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // Normally the value we use to step through an allocation is given to us by
2287b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // the driver. However, for certain primitive data types, we can derive an
2297b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // integer constant for the step value. We use this integer constant whenever
2307b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // possible to allow further compiler optimizations to take place.
2317b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  //
232b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines  // DL - Target Data size/layout information.
2332b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // T - Type of allocation (should be a pointer).
2342b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // OrigStep - Original step increment (root.expand() input from driver).
235bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
2362b04086acbef6520ae2c54a868b1271abf053122Stephen Hines                            llvm::Value *OrigStep) {
237b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines    bccAssert(DL);
238bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bccAssert(AllocType);
2392b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    bccAssert(OrigStep);
240bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
241429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (isStepOptSupported(AllocType)) {
2422b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      llvm::Type *ET = PT->getElementType();
243b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines      uint64_t ETSize = DL->getTypeAllocSize(ET);
244bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
2452b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      return llvm::ConstantInt::get(Int32Ty, ETSize);
2462b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    } else {
2472b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      return OrigStep;
2482b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    }
2492b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  }
2502b04086acbef6520ae2c54a868b1271abf053122Stephen Hines
251097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes  /// Builds the types required by the pass for the given context.
252bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  void buildTypes(void) {
253e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    // Create the RsLaunchDimensionsTy and RsExpandKernelDriverInfoPfxTy structs.
254bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
255e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8Ty                   = llvm::Type::getInt8Ty(*Context);
256e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8PtrTy                = Int8Ty->getPointerTo();
257e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8PtrArrayInputLimitTy = llvm::ArrayType::get(Int8PtrTy, RS_KERNEL_INPUT_LIMIT);
258e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32Ty                  = llvm::Type::getInt32Ty(*Context);
259e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32ArrayInputLimitTy   = llvm::ArrayType::get(Int32Ty, RS_KERNEL_INPUT_LIMIT);
260e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *VoidPtrTy                = llvm::Type::getInt8PtrTy(*Context);
261e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32Array4Ty            = llvm::ArrayType::get(Int32Ty, 4);
262097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
263097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes    /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h:
264db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *
265e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * struct RsLaunchDimensions {
266e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t x;
267db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   uint32_t y;
268db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   uint32_t z;
269e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t lod;
270e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t face;
271e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t array[4];
272e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * };
273e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     */
274e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::SmallVector<llvm::Type*, RsLaunchDimensionsFieldCount> RsLaunchDimensionsTypes;
275e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t x
276e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t y
277e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t z
278e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t lod
279e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t face
280e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Array4Ty); // uint32_t array[4]
281e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::StructType *RsLaunchDimensionsTy =
282e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        llvm::StructType::create(RsLaunchDimensionsTypes, "RsLaunchDimensions");
283e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
2841d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross    /* Defined as the beginning of RsExpandKernelDriverInfo in frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h:
285e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
286e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * struct RsExpandKernelDriverInfoPfx {
287e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT];
288e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t inStride[RS_KERNEL_INPUT_LIMIT];
289e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t inLen;
290e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
291e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT];
292e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t outStride[RS_KERNEL_INPUT_LIMIT];
293e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t outLen;
294e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
295e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // Dimension of the launch
296e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     RsLaunchDimensions dim;
297e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
298e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // The walking iterator of the launch
299e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     RsLaunchDimensions current;
300e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
301e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     const void *usr;
302e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t usrLen;
303e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
304e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // Items below this line are not used by the compiler and can be change in the driver.
305e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // So the compiler must assume there are an unknown number of fields of unknown type
306e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // beginning here.
307db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     * };
3081d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross     *
3091d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross     * The name "RsExpandKernelDriverInfoPfx" is known to RSInvariantPass (RSInvariant.cpp).
310db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     */
311e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::SmallVector<llvm::Type*, RsExpandKernelDriverInfoPfxFieldCount> RsExpandKernelDriverInfoPfxTypes;
312e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT]
313e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy);   // uint32_t inStride[RS_KERNEL_INPUT_LIMIT]
314e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t inLen
315e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT]
316e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy);   // uint32_t outStride[RS_KERNEL_INPUT_LIMIT]
317e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t outLen
318e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy);     // RsLaunchDimensions dim
319e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy);     // RsLaunchDimensions current
320e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy);                // const void *usr
321e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t usrLen
322e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    RsExpandKernelDriverInfoPfxTy =
323e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx");
324bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
325bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    // Create the function type for expanded kernels.
3264e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
327bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
328e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *RsExpandKernelDriverInfoPfxPtrTy = RsExpandKernelDriverInfoPfxTy->getPointerTo();
3294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // void (const RsExpandKernelDriverInfoPfxTy *p, uint32_t x1, uint32_t x2, uint32_t outstep)
3304e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    ExpandedForEachType = llvm::FunctionType::get(VoidTy,
3314e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala        {RsExpandKernelDriverInfoPfxPtrTy, Int32Ty, Int32Ty, Int32Ty}, false);
3328ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  }
3338ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
3344e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  /// @brief Create skeleton of the expanded foreach kernel.
335357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
336357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  /// This creates a function with the following signature:
337357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
338357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
3395010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes  ///         uint32_t outstep)
340357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
3414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  llvm::Function *createEmptyExpandedForEachKernel(llvm::StringRef OldName) {
342bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
3434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      llvm::Function::Create(ExpandedForEachType,
344bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                             llvm::GlobalValue::ExternalLinkage,
345bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                             OldName + ".expand", Module);
3464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
347bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
348bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("p");
349bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("x1");
350bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("x2");
351bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("arg_outstep");
3524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
3534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala                                                       ExpandedFunction);
3544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::IRBuilder<> Builder(Begin);
3554e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    Builder.CreateRetVoid();
3564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    return ExpandedFunction;
3574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  }
3584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
359e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Create skeleton of a general reduce kernel's expanded accumulator.
360e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
361e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // This creates a function with the following signature:
362e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
363e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //  void @func.expand(%RsExpandKernelDriverInfoPfx* nocapture %p,
364e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                    i32 %x1, i32 %x2, accumType* nocapture %accum)
365e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
3669fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross  llvm::Function *createEmptyExpandedReduceAccumulator(llvm::StringRef OldName,
3679fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross                                                       llvm::Type *AccumArgTy) {
368e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
369e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
3709fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross    llvm::FunctionType *ExpandedReduceAccumulatorType =
371e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        llvm::FunctionType::get(VoidTy,
372e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                {RsExpandKernelDriverInfoPfxTy->getPointerTo(),
373e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 Int32Ty, Int32Ty, AccumArgTy}, false);
374e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Function *FnExpandedAccumulator =
3759fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross      llvm::Function::Create(ExpandedReduceAccumulatorType,
376e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                             llvm::GlobalValue::ExternalLinkage,
377e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                             OldName + ".expand", Module);
3789fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross    bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceAccumulatorParams);
379e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
380e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Function::arg_iterator AI = FnExpandedAccumulator->arg_begin();
381e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
382e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    using llvm::Attribute;
383e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
384e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Argument *Arg_p = &(*AI++);
385e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_p->setName("p");
386e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_p->addAttr(llvm::AttributeSet::get(*Context, Arg_p->getArgNo() + 1,
387e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                           llvm::makeArrayRef(Attribute::NoCapture)));
388e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
389e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Argument *Arg_x1 = &(*AI++);
390e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_x1->setName("x1");
391e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
392e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Argument *Arg_x2 = &(*AI++);
393e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_x2->setName("x2");
394e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
395e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Argument *Arg_accum = &(*AI++);
396e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_accum->setName("accum");
397e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_accum->addAttr(llvm::AttributeSet::get(*Context, Arg_accum->getArgNo() + 1,
398e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                               llvm::makeArrayRef(Attribute::NoCapture)));
399e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
400e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
401e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                       FnExpandedAccumulator);
402e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::IRBuilder<> Builder(Begin);
403e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Builder.CreateRetVoid();
404e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
405e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    return FnExpandedAccumulator;
406e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
407e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
408e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @brief Create an empty loop
409e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
410e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// Create a loop of the form:
411e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
412e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// for (i = LowerBound; i < UpperBound; i++)
413e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///   ;
414e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
415e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// After the loop has been created, the builder is set such that
416e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// instructions can be added to the loop body.
417e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
418e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param Builder The builder to use to build this loop. The current
419e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///                position of the builder is the position the loop
420e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///                will be inserted.
421e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param LowerBound The first value of the loop iterator
422e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param UpperBound The maximal value of the loop iterator
423e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param LoopIV A reference that will be set to the loop iterator.
424e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @return The BasicBlock that will be executed after the loop.
425e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
426e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser                               llvm::Value *LowerBound,
427e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser                               llvm::Value *UpperBound,
4284165d29822fc7caf81e435995ff6189608fc0323Dean De Leo                               llvm::Value **LoopIV) {
429c2ca742d7d0197c52e49467862844463fb42280fDavid Gross    bccAssert(LowerBound->getType() == UpperBound->getType());
430e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
431e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
4324165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    llvm::Value *Cond, *IVNext, *IV, *IVVar;
433e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
434e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    CondBB = Builder.GetInsertBlock();
435f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar    AfterBB = llvm::SplitBlock(CondBB, &*Builder.GetInsertPoint(), nullptr, nullptr);
436bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
437e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
4384165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    CondBB->getTerminator()->eraseFromParent();
4394165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    Builder.SetInsertPoint(CondBB);
4404165d29822fc7caf81e435995ff6189608fc0323Dean De Leo
4414165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    // decltype(LowerBound) *ivvar = alloca(sizeof(int))
4424165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    // *ivvar = LowerBound
4434165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    IVVar = Builder.CreateAlloca(LowerBound->getType(), nullptr, BCC_INDEX_VAR_NAME);
4444165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    Builder.CreateStore(LowerBound, IVVar);
4454165d29822fc7caf81e435995ff6189608fc0323Dean De Leo
446e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // if (LowerBound < Upperbound)
447e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto LoopHeader
448e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // else
449e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto AfterBB
450e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser    Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
451e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
452e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
4534165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    // LoopHeader:
4544165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    //   iv = *ivvar
4554165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    //   <insertion point here>
4564165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    //   iv.next = iv + 1
4574165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    //   *ivvar = iv.next
4584165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    //   if (iv.next < Upperbound)
4594165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    //     goto LoopHeader
4604165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    //   else
4614165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    //     goto AfterBB
4624165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    // AfterBB:
463e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.SetInsertPoint(HeaderBB);
4644165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    IV = Builder.CreateLoad(IVVar, "X");
465e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
4664165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    Builder.CreateStore(IVNext, IVVar);
467e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser    Cond = Builder.CreateICmpULT(IVNext, UpperBound);
468e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
469e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    AfterBB->setName("Exit");
4704165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    Builder.SetInsertPoint(llvm::cast<llvm::Instruction>(IVNext));
4714165d29822fc7caf81e435995ff6189608fc0323Dean De Leo
4724165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    // Record information about this loop.
473e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    *LoopIV = IV;
474e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    return AfterBB;
475e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  }
476e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
47728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // Finish building the outgoing argument list for calling a ForEach-able function.
47828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //
47928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // ArgVector - on input, the non-special arguments
48028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //             on output, the non-special arguments combined with the special arguments
48128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //               from SpecialArgVector
48228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // SpecialArgVector - special arguments (from ExpandSpecialArguments())
48328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // SpecialArgContextIdx - return value of ExpandSpecialArguments()
48428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //                          (position of context argument in SpecialArgVector)
48528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // CalleeFunction - the ForEach-able function being called
48628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // Builder - for inserting code into the caller function
48728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  template<unsigned int ArgVectorLen, unsigned int SpecialArgVectorLen>
48828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  void finishArgList(      llvm::SmallVector<llvm::Value *, ArgVectorLen>        &ArgVector,
48928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     const llvm::SmallVector<llvm::Value *, SpecialArgVectorLen> &SpecialArgVector,
49028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     const int SpecialArgContextIdx,
49128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     const llvm::Function &CalleeFunction,
49228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     llvm::IRBuilder<> &CallerBuilder) {
49328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    /* The context argument (if any) is a pointer to an opaque user-visible type that differs from
49428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     * the RsExpandKernelDriverInfoPfx type used in the function we are generating (although the
49528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     * two types represent the same thing).  Therefore, we must introduce a pointer cast when
49628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     * generating a call to the kernel function.
49728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     */
49828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    const int ArgContextIdx =
49928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross        SpecialArgContextIdx >= 0 ? (ArgVector.size() + SpecialArgContextIdx) : SpecialArgContextIdx;
50028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    ArgVector.append(SpecialArgVector.begin(), SpecialArgVector.end());
50128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    if (ArgContextIdx >= 0) {
50228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      llvm::Type *ContextArgType = nullptr;
50328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      int ArgIdx = ArgContextIdx;
50428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      for (const auto &Arg : CalleeFunction.getArgumentList()) {
50528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross        if (!ArgIdx--) {
50628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross          ContextArgType = Arg.getType();
50728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross          break;
50828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross        }
50928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      }
51028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      bccAssert(ContextArgType);
51128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      ArgVector[ArgContextIdx] = CallerBuilder.CreatePointerCast(ArgVector[ArgContextIdx], ContextArgType);
51228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    }
51328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  }
51428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
515083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // GEPHelper() returns a SmallVector of values suitable for passing
516083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // to IRBuilder::CreateGEP(), and SmallGEPIndices is a typedef for
517083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // the returned data type. It is sized so that the SmallVector
518083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // returned by GEPHelper() never needs to do a heap allocation for
519083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // any list of GEP indices it encounters in the code.
520083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  typedef llvm::SmallVector<llvm::Value *, 3> SmallGEPIndices;
521083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
522083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // Helper for turning a list of constant integer GEP indices into a
523083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // SmallVector of llvm::Value*. The return value is suitable for
524083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // passing to a GetElementPtrInst constructor or IRBuilder::CreateGEP().
525083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //
526083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // Inputs:
527083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //   I32Args should be integers which represent the index arguments
528083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //   to a GEP instruction.
529083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //
530083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // Returns:
531083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //   Returns a SmallVector of ConstantInts.
5324e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  SmallGEPIndices GEPHelper(const std::initializer_list<int32_t> I32Args) {
533083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    SmallGEPIndices Out(I32Args.size());
534083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::IntegerType *I32Ty = llvm::Type::getInt32Ty(*Context);
535083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    std::transform(I32Args.begin(), I32Args.end(), Out.begin(),
536083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                   [I32Ty](int32_t Arg) { return llvm::ConstantInt::get(I32Ty, Arg); });
537083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    return Out;
538083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  }
539083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
5408ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosserpublic:
5417e920a716693033edf32a6fedd03798bbfbd85ebChih-Hung Hsieh  explicit RSKernelExpandPass(bool pEnableStepOpt = true)
542900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes      : ModulePass(ID), Module(nullptr), Context(nullptr),
543bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes        mEnableStepOpt(pEnableStepOpt) {
544bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
5458ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  }
5468ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
547c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines  virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
548c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines    // This pass does not use any other analysis passes, but it does
549c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines    // add/wrap the existing functions in the module (thus altering the CFG).
550c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines  }
551c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines
55233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // Build contribution to outgoing argument list for calling a
553e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // ForEach-able function or a general reduction accumulator
554e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // function, based on the special parameters of that function.
55533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  //
556e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Signature - metadata bits for the signature of the callee
55733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // X, Arg_p - values derived directly from expanded function,
558e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //            suitable for computing arguments for the callee
55933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // CalleeArgs - contribution is accumulated here
56033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // Bump - invoked once for each contributed outgoing argument
561083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // LoopHeaderInsertionPoint - an Instruction in the loop header, before which
562083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //                            this function can insert loop-invariant loads
56328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //
56428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // Return value is the (zero-based) position of the context (Arg_p)
56528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // argument in the CalleeArgs vector, or a negative value if the
56628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // context argument is not placed in the CalleeArgs vector.
56728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  int ExpandSpecialArguments(uint32_t Signature,
56828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::Value *X,
56928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::Value *Arg_p,
57028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::IRBuilder<> &Builder,
57128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::SmallVector<llvm::Value*, 8> &CalleeArgs,
5728a019dd0040bedf5078e4d18e06a244a675b80e8Chih-Hung Hsieh                             const std::function<void ()> &Bump,
573083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             llvm::Instruction *LoopHeaderInsertionPoint) {
57428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
57528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    bccAssert(CalleeArgs.empty());
57628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
57728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    int Return = -1;
57833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) {
57933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      CalleeArgs.push_back(Arg_p);
58033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      Bump();
58128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      Return = CalleeArgs.size() - 1;
58233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
58333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
58433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
58533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      CalleeArgs.push_back(X);
58633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      Bump();
58733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
58833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
589e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature) ||
590e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
591083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      bccAssert(LoopHeaderInsertionPoint);
59233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
593083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      // Y and Z are loop invariant, so they can be hoisted out of the
594083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      // loop. Set the IRBuilder insertion point to the loop header.
595083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      auto OldInsertionPoint = Builder.saveIP();
596083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      Builder.SetInsertPoint(LoopHeaderInsertionPoint);
597e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
598e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
599083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        SmallGEPIndices YValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent,
600083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          RsLaunchDimensionsFieldY}));
601083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value *YAddr = Builder.CreateInBoundsGEP(Arg_p, YValueGEP, "Y.gep");
602083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        CalleeArgs.push_back(Builder.CreateLoad(YAddr, "Y"));
603e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        Bump();
604e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      }
605e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
606e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
607083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        SmallGEPIndices ZValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent,
608083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          RsLaunchDimensionsFieldZ}));
609083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value *ZAddr = Builder.CreateInBoundsGEP(Arg_p, ZValueGEP, "Z.gep");
610083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        CalleeArgs.push_back(Builder.CreateLoad(ZAddr, "Z"));
611e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        Bump();
612e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      }
613083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
614083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      Builder.restoreIP(OldInsertionPoint);
61533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
61628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
61728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    return Return;
61833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  }
61933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
620e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Generate loop-invariant input processing setup code for an expanded
621e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // ForEach-able function or an expanded general reduction accumulator
622e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // function.
623e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
624e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // LoopHeader - block at the end of which the setup code will be inserted
625e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Arg_p - RSKernelDriverInfo pointer passed to the expanded function
626e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // TBAAPointer - metadata for marking loads of pointer values out of RSKernelDriverInfo
627e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // ArgIter - iterator pointing to first input of the UNexpanded function
628e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // NumInputs - number of inputs (NOT number of ARGUMENTS)
629e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
630f039d98d346006442b61255a2889b8513a8cd56fYong Chen  // InTypes[] - this function saves input type, they will be used in ExpandInputsBody().
631f039d98d346006442b61255a2889b8513a8cd56fYong Chen  // InBufPtrs[] - this function sets each array element to point to the first cell / byte
632f039d98d346006442b61255a2889b8513a8cd56fYong Chen  //               (byte for x86, cell for other platforms) of the corresponding input allocation
633e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // InStructTempSlots[] - this function sets each array element either to nullptr
634e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                       or to the result of an alloca (for the case where the
635e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                       calling convention dictates that a value must be passed
636e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                       by reference, and so we need a stacked temporary to hold
637e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                       a copy of that value)
638e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  void ExpandInputsLoopInvariant(llvm::IRBuilder<> &Builder, llvm::BasicBlock *LoopHeader,
639e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::Value *Arg_p,
640e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::MDNode *TBAAPointer,
641e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::Function::arg_iterator ArgIter,
642e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 const size_t NumInputs,
643f039d98d346006442b61255a2889b8513a8cd56fYong Chen                                 llvm::SmallVectorImpl<llvm::Type *> &InTypes,
644e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs,
645e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots) {
646e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    bccAssert(NumInputs <= RS_KERNEL_INPUT_LIMIT);
647e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
648e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Extract information about input slots. The work done
649e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // here is loop-invariant, so we can hoist the operations out of the loop.
650e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    auto OldInsertionPoint = Builder.saveIP();
651e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Builder.SetInsertPoint(LoopHeader->getTerminator());
652e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
653e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    for (size_t InputIndex = 0; InputIndex < NumInputs; ++InputIndex, ArgIter++) {
654e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::Type *InType = ArgIter->getType();
655e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
656e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      /*
657e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * AArch64 calling conventions dictate that structs of sufficient size
658e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * get passed by pointer instead of passed by value.  This, combined
659e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * with the fact that we don't allow kernels to operate on pointer
660e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * data means that if we see a kernel with a pointer parameter we know
661e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * that it is a struct input that has been promoted.  As such we don't
662e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * need to convert its type to a pointer.  Later we will need to know
663e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * to create a temporary copy on the stack, so we save this information
664e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * in InStructTempSlots.
665e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       */
666e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (auto PtrType = llvm::dyn_cast<llvm::PointerType>(InType)) {
667e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        llvm::Type *ElementType = PtrType->getElementType();
668e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InStructTempSlots.push_back(Builder.CreateAlloca(ElementType, nullptr,
669e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                         "input_struct_slot"));
670e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      } else {
671e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InType = InType->getPointerTo();
672e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InStructTempSlots.push_back(nullptr);
673e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      }
674e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
675e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      SmallGEPIndices InBufPtrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr,
676e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                             static_cast<int32_t>(InputIndex)}));
677e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::Value    *InBufPtrAddr = Builder.CreateInBoundsGEP(Arg_p, InBufPtrGEP, "input_buf.gep");
678e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::LoadInst *InBufPtr = Builder.CreateLoad(InBufPtrAddr, "input_buf");
679f039d98d346006442b61255a2889b8513a8cd56fYong Chen
680f039d98d346006442b61255a2889b8513a8cd56fYong Chen      llvm::Value *CastInBufPtr = nullptr;
68197e50993c70083fdedb4f1dd2c487aa55c6f60cfDavid Gross      if (mStructExplicitlyPaddedBySlang || (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING)) {
682f039d98d346006442b61255a2889b8513a8cd56fYong Chen        CastInBufPtr = Builder.CreatePointerCast(InBufPtr, InType, "casted_in");
683f039d98d346006442b61255a2889b8513a8cd56fYong Chen      } else {
684f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // The disagreement between module and x86 target machine datalayout
685f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // causes mismatched input/output data offset between slang reflected
686f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // code and bcc codegen for GetElementPtr. To solve this issue, skip the
687f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // cast to InType and leave CastInBufPtr as an int8_t*.  The buffer is
688f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // later indexed with an explicit byte offset computed based on
68997e50993c70083fdedb4f1dd2c487aa55c6f60cfDavid Gross        // X86_CUSTOM_DL_STRING and then bitcast to actual input type.
690f039d98d346006442b61255a2889b8513a8cd56fYong Chen        CastInBufPtr = InBufPtr;
691f039d98d346006442b61255a2889b8513a8cd56fYong Chen      }
692e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
693e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (gEnableRsTbaa) {
694e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InBufPtr->setMetadata("tbaa", TBAAPointer);
695e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      }
696e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
697f039d98d346006442b61255a2889b8513a8cd56fYong Chen      InTypes.push_back(InType);
698e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      InBufPtrs.push_back(CastInBufPtr);
699e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    }
700e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
701e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Builder.restoreIP(OldInsertionPoint);
702e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
703e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
704e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Generate loop-varying input processing code for an expanded ForEach-able function
705e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // or an expanded general reduction accumulator function.  Also, for the call to the
706e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // UNexpanded function, collect the portion of the argument list corresponding to the
707e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // inputs.
708e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
709e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Arg_x1 - first X coordinate to be processed by the expanded function
710e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // TBAAAllocation - metadata for marking loads of input values out of allocations
711e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // NumInputs -- number of inputs (NOT number of ARGUMENTS)
712f039d98d346006442b61255a2889b8513a8cd56fYong Chen  // InTypes[] - this function uses the saved input types in ExpandInputsLoopInvariant()
713f039d98d346006442b61255a2889b8513a8cd56fYong Chen  //             to convert the pointer of byte InPtr to its real type.
714e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // InBufPtrs[] - this function consumes the information produced by ExpandInputsLoopInvariant()
715e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // InStructTempSlots[] - this function consumes the information produced by ExpandInputsLoopInvariant()
716e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // IndVar - value of loop induction variable (X coordinate) for a given loop iteration
717e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
718e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // RootArgs - this function sets this to the list of outgoing argument values corresponding
719e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //            to the inputs
720e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  void ExpandInputsBody(llvm::IRBuilder<> &Builder,
721e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        llvm::Value *Arg_x1,
722e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        llvm::MDNode *TBAAAllocation,
723e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        const size_t NumInputs,
724f039d98d346006442b61255a2889b8513a8cd56fYong Chen                        const llvm::SmallVectorImpl<llvm::Type *> &InTypes,
725e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        const llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs,
726e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        const llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots,
727e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        llvm::Value *IndVar,
728e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        llvm::SmallVectorImpl<llvm::Value *> &RootArgs) {
729e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Offset = Builder.CreateSub(IndVar, Arg_x1);
730f039d98d346006442b61255a2889b8513a8cd56fYong Chen    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
731e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
732e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    for (size_t Index = 0; Index < NumInputs; ++Index) {
733e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
734f039d98d346006442b61255a2889b8513a8cd56fYong Chen      llvm::Value *InPtr = nullptr;
73597e50993c70083fdedb4f1dd2c487aa55c6f60cfDavid Gross      if (mStructExplicitlyPaddedBySlang || (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING)) {
736f039d98d346006442b61255a2889b8513a8cd56fYong Chen        InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], Offset);
737f039d98d346006442b61255a2889b8513a8cd56fYong Chen      } else {
738f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // Treat x86 input buffer as byte[], get indexed pointer with explicit
739f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // byte offset computed using a datalayout based on
740f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // X86_CUSTOM_DL_STRING, then bitcast it to actual input type.
741f039d98d346006442b61255a2889b8513a8cd56fYong Chen        llvm::DataLayout DL(X86_CUSTOM_DL_STRING);
742f039d98d346006442b61255a2889b8513a8cd56fYong Chen        llvm::Type *InTy = InTypes[Index];
743f039d98d346006442b61255a2889b8513a8cd56fYong Chen        uint64_t InStep = DL.getTypeAllocSize(InTy->getPointerElementType());
744f039d98d346006442b61255a2889b8513a8cd56fYong Chen        llvm::Value *OffsetInBytes = Builder.CreateMul(Offset, llvm::ConstantInt::get(Int32Ty, InStep));
745f039d98d346006442b61255a2889b8513a8cd56fYong Chen        InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], OffsetInBytes);
746f039d98d346006442b61255a2889b8513a8cd56fYong Chen        InPtr = Builder.CreatePointerCast(InPtr, InTy);
747f039d98d346006442b61255a2889b8513a8cd56fYong Chen      }
748f039d98d346006442b61255a2889b8513a8cd56fYong Chen
749f039d98d346006442b61255a2889b8513a8cd56fYong Chen      llvm::Value *Input;
750e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
751e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
752e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (gEnableRsTbaa) {
753e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InputLoad->setMetadata("tbaa", TBAAAllocation);
754e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      }
755e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
756e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (llvm::Value *TemporarySlot = InStructTempSlots[Index]) {
757e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // Pass a pointer to a temporary on the stack, rather than
758e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // passing a pointer to the original value. We do not want
759e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // the kernel to potentially modify the input data.
760e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
761e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // Note: don't annotate with TBAA, since the kernel might
762e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // have its own TBAA annotations for the pointer argument.
763e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        Builder.CreateStore(InputLoad, TemporarySlot);
764e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        Input = TemporarySlot;
765e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      } else {
766e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        Input = InputLoad;
767e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      }
768e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
769e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      RootArgs.push_back(Input);
770e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    }
771e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
772e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
7738ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  /* Performs the actual optimization on a selected function. On success, the
7748ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   * Module will contain a new function of the name "<NAME>.expand" that
7758ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   * invokes <NAME>() in a loop with the appropriate parameters.
7768ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   */
7774e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  bool ExpandOldStyleForEach(llvm::Function *Function, uint32_t Signature) {
778bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    ALOGV("Expanding ForEach-able Function %s",
779bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes          Function->getName().str().c_str());
7808ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
7818ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser    if (!Signature) {
782bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      Signature = getRootSignature(Function);
7838ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser      if (!Signature) {
7848ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        // We couldn't determine how to expand this function based on its
7858ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        // function signature.
7868ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        return false;
7878ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser      }
7888ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser    }
7898ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
790bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::DataLayout DL(Module);
79197e50993c70083fdedb4f1dd2c487aa55c6f60cfDavid Gross    if (!mStructExplicitlyPaddedBySlang && (Module->getTargetTriple() == DEFAULT_X86_TRIPLE_STRING)) {
792f039d98d346006442b61255a2889b8513a8cd56fYong Chen      DL.reset(X86_CUSTOM_DL_STRING);
793f039d98d346006442b61255a2889b8513a8cd56fYong Chen    }
7948ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
795bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
7964e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      createEmptyExpandedForEachKernel(Function->getName());
797db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
798bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    /*
799bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * Extract the expanded function's parameters.  It is guaranteed by
800e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross     * createEmptyExpandedForEachKernel that there will be four parameters.
801bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     */
80233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
8034e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
80433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
805bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator ExpandedFunctionArgIter =
806bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      ExpandedFunction->arg_begin();
807db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
808bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
809bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
810bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
8115010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes    llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
812bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
813900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *InStep  = nullptr;
814900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutStep = nullptr;
815db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
816db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // Construct the actual function body.
817f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar    llvm::IRBuilder<> Builder(&*ExpandedFunction->getEntryBlock().begin());
818db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
819cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // Collect and construct the arguments for the kernel().
820db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // Note that we load any loop-invariant arguments before entering the Loop.
821bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
822db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
823900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Type  *InTy      = nullptr;
824083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::Value *InBufPtr = nullptr;
825d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
826083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices InStepGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInStride, 0}));
827083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      llvm::LoadInst *InStepArg  = Builder.CreateLoad(
828083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        Builder.CreateInBoundsGEP(Arg_p, InStepGEP, "instep_addr.gep"), "instep_addr");
829e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
830bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      InTy = (FunctionArgIter++)->getType();
831e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes      InStep = getStepValue(&DL, InTy, InStepArg);
832e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
8332b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      InStep->setName("instep");
834e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
835083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices InputAddrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr, 0}));
836083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      InBufPtr = Builder.CreateLoad(
837083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        Builder.CreateInBoundsGEP(Arg_p, InputAddrGEP, "input_buf.gep"), "input_buf");
838db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
839db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
840900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Type *OutTy = nullptr;
841900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutBasePtr = nullptr;
842d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
843bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      OutTy = (FunctionArgIter++)->getType();
844b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
8452b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      OutStep->setName("outstep");
846083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0}));
847083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep"));
848db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
849db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
850900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *UsrData = nullptr;
851d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
852bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
853083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      llvm::Value *UsrDataPointerAddr = Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldUsr);
854083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      UsrData = Builder.CreatePointerCast(Builder.CreateLoad(UsrDataPointerAddr), UsrDataTy);
855db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      UsrData->setName("UsrData");
856db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
857db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
858083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
8594165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    llvm::Value *IV;
86033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    createLoop(Builder, Arg_x1, Arg_x2, &IV);
861097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
86233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
86328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    const int CalleeArgsContextIdx = ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
864083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                                                            [&FunctionArgIter]() { FunctionArgIter++; },
865083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                                                            LoopHeader->getTerminator());
866db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
867bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bccAssert(FunctionArgIter == Function->arg_end());
868db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
869cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // Populate the actual call to kernel().
870db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::SmallVector<llvm::Value*, 8> RootArgs;
871db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
872900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *InPtr  = nullptr;
873900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutPtr = nullptr;
874db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
875ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    // Calculate the current input and output pointers
87602f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    //
877ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    // We always calculate the input/output pointers with a GEP operating on i8
87802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // values and only cast at the very end to OutTy. This is because the step
87902f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // between two values is given in bytes.
88002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    //
88102f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // TODO: We could further optimize the output by using a GEP operation of
88202f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // type 'OutTy' in cases where the element type of the allocation allows.
88302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    if (OutBasePtr) {
88402f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
88502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      OutOffset = Builder.CreateMul(OutOffset, OutStep);
886083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutPtr = Builder.CreateInBoundsGEP(OutBasePtr, OutOffset);
88702f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
88802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    }
889bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
890083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    if (InBufPtr) {
891ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
892ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      InOffset = Builder.CreateMul(InOffset, InStep);
893083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      InPtr = Builder.CreateInBoundsGEP(InBufPtr, InOffset);
894ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      InPtr = Builder.CreatePointerCast(InPtr, InTy);
895ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    }
89602f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser
897ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    if (InPtr) {
8987ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      RootArgs.push_back(InPtr);
899db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
900db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
90102f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    if (OutPtr) {
9027ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      RootArgs.push_back(OutPtr);
903db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
904db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
905db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (UsrData) {
906db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      RootArgs.push_back(UsrData);
907db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
908db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
90928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
910db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
911bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    Builder.CreateCall(Function, RootArgs);
912db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
9137ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    return true;
9147ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines  }
9157ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
9164e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  /* Expand a pass-by-value foreach kernel.
9177ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines   */
9184e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  bool ExpandForEach(llvm::Function *Function, uint32_t Signature) {
919d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
920bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
9217ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
9224e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // TODO: Refactor this to share functionality with ExpandOldStyleForEach.
923bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::DataLayout DL(Module);
92497e50993c70083fdedb4f1dd2c487aa55c6f60cfDavid Gross    if (!mStructExplicitlyPaddedBySlang && (Module->getTargetTriple() == DEFAULT_X86_TRIPLE_STRING)) {
925f039d98d346006442b61255a2889b8513a8cd56fYong Chen      DL.reset(X86_CUSTOM_DL_STRING);
926f039d98d346006442b61255a2889b8513a8cd56fYong Chen    }
927f039d98d346006442b61255a2889b8513a8cd56fYong Chen    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
9287ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
929bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
9304e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      createEmptyExpandedForEachKernel(Function->getName());
9317ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
932bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    /*
933bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * Extract the expanded function's parameters.  It is guaranteed by
934e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross     * createEmptyExpandedForEachKernel that there will be four parameters.
935bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     */
936881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
9374e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
938881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
939bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator ExpandedFunctionArgIter =
940bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      ExpandedFunction->arg_begin();
941bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
942bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
943bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
944bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
9453bc475b206c3fa249a212b90fe989fdcda4d75f9Matt Wala    // Arg_outstep is not used by expanded new-style forEach kernels.
9467ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
9477ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    // Construct the actual function body.
948f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar    llvm::IRBuilder<> Builder(&*ExpandedFunction->getEntryBlock().begin());
9497ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
95018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Create TBAA meta-data.
951354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript,
952354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines                 *TBAAAllocation, *TBAAPointer;
953bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDBuilder MDHelper(*Context);
95414588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien
955354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    TBAARenderScriptDistinct =
9564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      MDHelper.createTBAARoot(kRenderScriptTBAARootName);
9574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
958354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines        TBAARenderScriptDistinct);
959e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
960e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                       TBAARenderScript);
961e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
962e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                      TBAAAllocation, 0);
963e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
964e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                    TBAARenderScript);
96514588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien    TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
96618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
967881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    /*
968881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     * Collect and construct the arguments for the kernel().
969881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     *
970881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     * Note that we load any loop-invariant arguments before entering the Loop.
971881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     */
972083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    size_t NumRemainingInputs = Function->arg_size();
9737ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
974881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // No usrData parameter on kernels.
975881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    bccAssert(
976881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
977881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
978881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    llvm::Function::arg_iterator ArgIter = Function->arg_begin();
979881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
980881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Check the return type
981bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::Type     *OutTy            = nullptr;
982bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::LoadInst *OutBasePtr       = nullptr;
983bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::Value    *CastedOutBasePtr = nullptr;
984881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
985e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    bool PassOutByPointer = false;
986881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
987d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
988bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *OutBaseTy = Function->getReturnType();
989881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
99074a4b08235990916911b8fe758d656c1171faf26Stephen Hines      if (OutBaseTy->isVoidTy()) {
991e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes        PassOutByPointer = true;
992881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        OutTy = ArgIter->getType();
993881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
994881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        ArgIter++;
995083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        --NumRemainingInputs;
99674a4b08235990916911b8fe758d656c1171faf26Stephen Hines      } else {
99774a4b08235990916911b8fe758d656c1171faf26Stephen Hines        // We don't increment Args, since we are using the actual return type.
998881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        OutTy = OutBaseTy->getPointerTo();
99974a4b08235990916911b8fe758d656c1171faf26Stephen Hines      }
1000881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
1001083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0}));
1002083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep"));
1003097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
10049c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      if (gEnableRsTbaa) {
10059c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines        OutBasePtr->setMetadata("tbaa", TBAAPointer);
10069c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      }
100750f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray
100897e50993c70083fdedb4f1dd2c487aa55c6f60cfDavid Gross      if (mStructExplicitlyPaddedBySlang || (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING)) {
1009f039d98d346006442b61255a2889b8513a8cd56fYong Chen        CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out");
1010f039d98d346006442b61255a2889b8513a8cd56fYong Chen      } else {
1011f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // The disagreement between module and x86 target machine datalayout
1012f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // causes mismatched input/output data offset between slang reflected
1013f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // code and bcc codegen for GetElementPtr. To solve this issue, skip the
1014f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // cast to OutTy and leave CastedOutBasePtr as an int8_t*.  The buffer
1015f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // is later indexed with an explicit byte offset computed based on
101697e50993c70083fdedb4f1dd2c487aa55c6f60cfDavid Gross        // X86_CUSTOM_DL_STRING and then bitcast to actual output type.
1017f039d98d346006442b61255a2889b8513a8cd56fYong Chen        CastedOutBasePtr = OutBasePtr;
1018f039d98d346006442b61255a2889b8513a8cd56fYong Chen      }
101974a4b08235990916911b8fe758d656c1171faf26Stephen Hines    }
102074a4b08235990916911b8fe758d656c1171faf26Stephen Hines
1021f039d98d346006442b61255a2889b8513a8cd56fYong Chen    llvm::SmallVector<llvm::Type*,  8> InTypes;
1022083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
1023d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala    llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
1024881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
1025083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    bccAssert(NumRemainingInputs <= RS_KERNEL_INPUT_LIMIT);
1026881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
1027083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // Create the loop structure.
1028083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
10294165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    llvm::Value *IV;
1030083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    createLoop(Builder, Arg_x1, Arg_x2, &IV);
1031881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
1032083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
1033083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    const int CalleeArgsContextIdx =
1034083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
1035083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             [&NumRemainingInputs]() { --NumRemainingInputs; },
1036083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             LoopHeader->getTerminator());
1037083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
1038083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // After ExpandSpecialArguments() gets called, NumRemainingInputs
1039083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // counts the number of arguments to the kernel that correspond to
1040083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // an array entry from the InPtr field of the DriverInfo
1041083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // structure.
1042083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    const size_t NumInPtrArguments = NumRemainingInputs;
1043083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
1044083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    if (NumInPtrArguments > 0) {
1045e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, ArgIter, NumInPtrArguments,
1046f039d98d346006442b61255a2889b8513a8cd56fYong Chen                                InTypes, InBufPtrs, InStructTempSlots);
1047881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    }
10487ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
10497ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    // Populate the actual call to kernel().
10507ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    llvm::SmallVector<llvm::Value*, 8> RootArgs;
10517ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
10529296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala    // Calculate the current input and output pointers.
1053881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
1054881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Output
1055881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
1056900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutPtr = nullptr;
1057bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    if (CastedOutBasePtr) {
10587b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
1059f039d98d346006442b61255a2889b8513a8cd56fYong Chen
106097e50993c70083fdedb4f1dd2c487aa55c6f60cfDavid Gross      if (mStructExplicitlyPaddedBySlang || (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING)) {
1061f039d98d346006442b61255a2889b8513a8cd56fYong Chen        OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffset);
1062f039d98d346006442b61255a2889b8513a8cd56fYong Chen      } else {
1063f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // Treat x86 output buffer as byte[], get indexed pointer with explicit
1064f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // byte offset computed using a datalayout based on
1065f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // X86_CUSTOM_DL_STRING, then bitcast it to actual output type.
1066f039d98d346006442b61255a2889b8513a8cd56fYong Chen        uint64_t OutStep = DL.getTypeAllocSize(OutTy->getPointerElementType());
1067f039d98d346006442b61255a2889b8513a8cd56fYong Chen        llvm::Value *OutOffsetInBytes = Builder.CreateMul(OutOffset, llvm::ConstantInt::get(Int32Ty, OutStep));
1068f039d98d346006442b61255a2889b8513a8cd56fYong Chen        OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffsetInBytes);
1069f039d98d346006442b61255a2889b8513a8cd56fYong Chen        OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
1070f039d98d346006442b61255a2889b8513a8cd56fYong Chen      }
1071bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
1072e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes      if (PassOutByPointer) {
1073881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        RootArgs.push_back(OutPtr);
1074881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes      }
10754102bec56151fb5d9c962fb298412f34a6eacaa8Tobias Grosser    }
10767b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser
1077881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Inputs
107874a4b08235990916911b8fe758d656c1171faf26Stephen Hines
1079083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    if (NumInPtrArguments > 0) {
1080e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInPtrArguments,
1081f039d98d346006442b61255a2889b8513a8cd56fYong Chen                       InTypes, InBufPtrs, InStructTempSlots, IV, RootArgs);
10827ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
10837ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
108428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
10857ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
1086bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
10877ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
1088e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    if (OutPtr && !PassOutByPointer) {
10899296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala      RetVal->setName("call.result");
109018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
10919c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      if (gEnableRsTbaa) {
10929c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines        Store->setMetadata("tbaa", TBAAAllocation);
10939c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      }
10947ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
10957ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
1096db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return true;
1097db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
1098db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1099e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Certain categories of functions that make up a general
1100e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // reduce-style kernel are called directly from the driver with no
1101e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // expansion needed.  For a function in such a category, we need to
1102e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // promote linkage from static to external, to ensure that the
1103e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // function is visible to the driver in the dynamic symbol table.
1104e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // This promotion is safe because we don't have any kind of cross
1105e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // translation unit linkage model (except for linking against
1106e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // RenderScript libraries), so we do not risk name clashes.
11079fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross  bool PromoteReduceFunction(const char *Name, FunctionSet &PromotedFunctions) {
1108e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    if (!Name)  // a presumably-optional function that is not present
1109e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      return false;
1110e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1111e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Function *Fn = Module->getFunction(Name);
1112e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    bccAssert(Fn != nullptr);
1113e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    if (PromotedFunctions.insert(Fn).second) {
1114e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      bccAssert(Fn->getLinkage() == llvm::GlobalValue::InternalLinkage);
1115e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      Fn->setLinkage(llvm::GlobalValue::ExternalLinkage);
1116e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      return true;
1117e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    }
1118e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1119e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    return false;
1120e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
1121e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1122e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Expand the accumulator function for a general reduce-style kernel.
1123e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1124e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // The input is a function of the form
1125e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1126e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //   define void @func(accumType* %accum, foo1 in1[, ... fooN inN] [, special arguments])
1127e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1128e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // where all arguments except the first are the same as for a foreach kernel.
1129e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1130e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // The input accumulator function gets expanded into a function of the form
1131e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1132e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //   define void @func.expand(%RsExpandKernelDriverInfoPfx* %p, i32 %x1, i32 %x2, accumType* %accum)
1133e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1134e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // which performs a serial accumulaion of elements [x1, x2) into *%accum.
1135e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1136e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // In pseudocode, @func.expand does:
1137e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1138e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //   for (i = %x1; i < %x2; ++i) {
1139e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //     func(%accum,
1140e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //          *((foo1 *)p->inPtr[0] + i)[, ... *((fooN *)p->inPtr[N-1] + i)
1141e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //          [, p] [, i] [, p->current.y] [, p->current.z]);
1142e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //   }
1143e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1144e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // This is very similar to foreach kernel expansion with no output.
11459fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross  bool ExpandReduceAccumulator(llvm::Function *FnAccumulator, uint32_t Signature, size_t NumInputs) {
1146e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    ALOGV("Expanding accumulator %s for general reduce kernel",
1147e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross          FnAccumulator->getName().str().c_str());
1148e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1149e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Create TBAA meta-data.
1150e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript,
1151e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                 *TBAAAllocation, *TBAAPointer;
1152e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::MDBuilder MDHelper(*Context);
1153e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAARenderScriptDistinct =
1154e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      MDHelper.createTBAARoot(kRenderScriptTBAARootName);
1155e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
1156e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        TBAARenderScriptDistinct);
1157e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
1158e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                       TBAARenderScript);
1159e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
1160e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                      TBAAAllocation, 0);
1161e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
1162e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                    TBAARenderScript);
1163e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
1164e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1165e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    auto AccumulatorArgIter = FnAccumulator->arg_begin();
1166e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1167e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Create empty accumulator function.
1168e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Function *FnExpandedAccumulator =
11699fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross        createEmptyExpandedReduceAccumulator(FnAccumulator->getName(),
11709fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross                                             (AccumulatorArgIter++)->getType());
1171e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1172e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Extract the expanded accumulator's parameters.  It is
11739fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross    // guaranteed by createEmptyExpandedReduceAccumulator that
1174e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // there will be 4 parameters.
11759fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross    bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceAccumulatorParams);
1176e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    auto ExpandedAccumulatorArgIter = FnExpandedAccumulator->arg_begin();
1177e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Arg_p     = &*(ExpandedAccumulatorArgIter++);
1178e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Arg_x1    = &*(ExpandedAccumulatorArgIter++);
1179e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Arg_x2    = &*(ExpandedAccumulatorArgIter++);
1180e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Arg_accum = &*(ExpandedAccumulatorArgIter++);
1181e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1182e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Construct the actual function body.
1183f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar    llvm::IRBuilder<> Builder(&*FnExpandedAccumulator->getEntryBlock().begin());
1184e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1185e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Create the loop structure.
1186e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
11874165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    llvm::Value *IndVar;
1188e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    createLoop(Builder, Arg_x1, Arg_x2, &IndVar);
1189e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1190e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
1191e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    const int CalleeArgsContextIdx =
1192e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        ExpandSpecialArguments(Signature, IndVar, Arg_p, Builder, CalleeArgs,
1193e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                               [](){}, LoopHeader->getTerminator());
1194e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1195f039d98d346006442b61255a2889b8513a8cd56fYong Chen    llvm::SmallVector<llvm::Type*,  8> InTypes;
1196e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
1197e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
1198e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, AccumulatorArgIter, NumInputs,
1199f039d98d346006442b61255a2889b8513a8cd56fYong Chen                              InTypes, InBufPtrs, InStructTempSlots);
1200e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1201e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Populate the actual call to the original accumulator.
1202e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::SmallVector<llvm::Value*, 8> RootArgs;
1203e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    RootArgs.push_back(Arg_accum);
1204f039d98d346006442b61255a2889b8513a8cd56fYong Chen    ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInputs, InTypes, InBufPtrs, InStructTempSlots,
1205e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                     IndVar, RootArgs);
1206e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *FnAccumulator, Builder);
1207e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Builder.CreateCall(FnAccumulator, RootArgs);
1208e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1209e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    return true;
1210e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
1211e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1212dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  // Create a combiner function for a general reduce-style kernel that lacks one,
1213dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  // by calling the accumulator function.
1214dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  //
1215dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  // The accumulator function must be of the form
1216dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  //
1217dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  //   define void @accumFn(accumType* %accum, accumType %in)
1218dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  //
1219dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  // A combiner function will be generated of the form
1220dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  //
1221dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  //   define void @accumFn.combiner(accumType* %accum, accumType* %other) {
1222dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  //     %1 = load accumType, accumType* %other
1223dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  //     call void @accumFn(accumType* %accum, accumType %1);
1224dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  //   }
12259fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross  bool CreateReduceCombinerFromAccumulator(llvm::Function *FnAccumulator) {
1226dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    ALOGV("Creating combiner from accumulator %s for general reduce kernel",
1227dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross          FnAccumulator->getName().str().c_str());
1228dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
1229dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    using llvm::Attribute;
1230dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
1231dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    bccAssert(FnAccumulator->arg_size() == 2);
1232dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    auto AccumulatorArgIter = FnAccumulator->arg_begin();
1233dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::Value *AccumulatorArg_accum = &*(AccumulatorArgIter++);
1234dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::Value *AccumulatorArg_in    = &*(AccumulatorArgIter++);
1235dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::Type *AccumulatorArgType = AccumulatorArg_accum->getType();
1236dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    bccAssert(AccumulatorArgType->isPointerTy());
1237dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
1238dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
1239dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::FunctionType *CombinerType =
1240dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross        llvm::FunctionType::get(VoidTy, { AccumulatorArgType, AccumulatorArgType }, false);
1241dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::Function *FnCombiner =
1242dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross        llvm::Function::Create(CombinerType, llvm::GlobalValue::ExternalLinkage,
12439fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross                               nameReduceCombinerFromAccumulator(FnAccumulator->getName()),
1244dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross                               Module);
1245dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
1246dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    auto CombinerArgIter = FnCombiner->arg_begin();
1247dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
1248dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::Argument *CombinerArg_accum = &(*CombinerArgIter++);
1249dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    CombinerArg_accum->setName("accum");
1250dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    CombinerArg_accum->addAttr(llvm::AttributeSet::get(*Context, CombinerArg_accum->getArgNo() + 1,
1251dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross                                                       llvm::makeArrayRef(Attribute::NoCapture)));
1252dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
1253dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::Argument *CombinerArg_other = &(*CombinerArgIter++);
1254dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    CombinerArg_other->setName("other");
1255dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    CombinerArg_other->addAttr(llvm::AttributeSet::get(*Context, CombinerArg_other->getArgNo() + 1,
1256dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross                                                       llvm::makeArrayRef(Attribute::NoCapture)));
1257dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
1258dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::BasicBlock *BB = llvm::BasicBlock::Create(*Context, "BB", FnCombiner);
1259dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::IRBuilder<> Builder(BB);
1260dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
1261dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    if (AccumulatorArg_in->getType()->isPointerTy()) {
1262dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      // Types of sufficient size get passed by pointer-to-copy rather
1263dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      // than passed by value.  An accumulator cannot take a pointer
1264dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      // at the user level; so if we see a pointer here, we know that
1265dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      // we have a pass-by-pointer-to-copy case.
1266dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      llvm::Type *ElementType = AccumulatorArg_in->getType()->getPointerElementType();
1267dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      llvm::Value *TempMem = Builder.CreateAlloca(ElementType, nullptr, "caller_copy");
1268dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      Builder.CreateStore(Builder.CreateLoad(CombinerArg_other), TempMem);
1269dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      Builder.CreateCall(FnAccumulator, { CombinerArg_accum, TempMem });
1270dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    } else {
1271dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      llvm::Value *TypeAdjustedOther = CombinerArg_other;
1272dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      if (AccumulatorArgType->getPointerElementType() != AccumulatorArg_in->getType()) {
1273dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross        // Call lowering by frontend has done some type coercion
1274dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross        TypeAdjustedOther = Builder.CreatePointerCast(CombinerArg_other,
1275dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross                                                      AccumulatorArg_in->getType()->getPointerTo(),
1276dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross                                                      "cast");
1277dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      }
1278dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      llvm::Value *DerefOther = Builder.CreateLoad(TypeAdjustedOther);
1279dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      Builder.CreateCall(FnAccumulator, { CombinerArg_accum, DerefOther });
1280dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    }
1281dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    Builder.CreateRetVoid();
1282dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
1283dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    return true;
1284dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  }
1285dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
128618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// @brief Checks if pointers to allocation internals are exposed
128718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  ///
128818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// This function verifies if through the parameters passed to the kernel
128918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// or through calls to the runtime library the script gains access to
129018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// pointers pointing to data within a RenderScript Allocation.
129118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// If we know we control all loads from and stores to data within
129218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// RenderScript allocations and if we know the run-time internal accesses
129318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// are all annotated with RenderScript TBAA metadata, only then we
129418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// can safely use TBAA to distinguish between generic and from-allocation
129518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// pointers.
1296bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  bool allocPointersExposed(llvm::Module &Module) {
129718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Old style kernel function can expose pointers to elements within
129818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // allocations.
129918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // TODO: Extend analysis to allow simple cases of old-style kernels.
130025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    for (size_t i = 0; i < mExportForEachCount; ++i) {
130125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      const char *Name = mExportForEachNameList[i];
130225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      uint32_t Signature = mExportForEachSignatureList[i];
1303bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (Module.getFunction(Name) &&
1304d88177580db4ddedf680854c51db333c97eabc59Stephen Hines          !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
130518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
130618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
130718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
130818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
130918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Check for library functions that expose a pointer to an Allocation or
131018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // that are not yet annotated with RenderScript-specific tbaa information.
1311e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala    static const std::vector<const char *> Funcs{
1312e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAt(...)
1313e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsGetElementAt13rs_allocationj",
1314e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsGetElementAt13rs_allocationjj",
1315e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsGetElementAt13rs_allocationjjj",
1316e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1317e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsSetElementAt()
1318e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsSetElementAt13rs_allocationPvj",
1319e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsSetElementAt13rs_allocationPvjj",
1320e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsSetElementAt13rs_allocationPvjjj",
1321e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1322e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAtYuv_uchar_Y()
1323e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj",
1324e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1325e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAtYuv_uchar_U()
1326e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj",
1327e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1328e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAtYuv_uchar_V()
1329e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj",
1330e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala    };
1331e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1332e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala    for (auto FI : Funcs) {
1333e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      llvm::Function *Function = Module.getFunction(FI);
133418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1335bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (!Function) {
1336e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala        ALOGE("Missing run-time function '%s'", FI);
133718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
133818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
133918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1340bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (Function->getNumUses() > 0) {
134118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
134218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
134318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
134418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
134518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    return false;
134618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  }
134718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
134818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
134918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  ///
135018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// The TBAA metadata used to annotate loads/stores from RenderScript
1351e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// Allocations is generated in a separate TBAA tree with a
1352354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// "RenderScript Distinct TBAA" root node. LLVM does assume may-alias for
1353354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// all nodes in unrelated alias analysis trees. This function makes the
1354354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// "RenderScript TBAA" node (which is parented by the Distinct TBAA root),
1355e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With
1356e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// the connected trees every access to an Allocation is resolved to
1357e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// must-alias if compared to a normal C/C++ access.
1358bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
1359bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDBuilder MDHelper(*Context);
1360354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScriptDistinct =
1361354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines      MDHelper.createTBAARoot("RenderScript Distinct TBAA");
1362354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScript = MDHelper.createTBAANode(
1363354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines        "RenderScript TBAA", TBAARenderScriptDistinct);
1364bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDNode *TBAARoot     = MDHelper.createTBAARoot("Simple C/C++ TBAA");
1365354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    TBAARenderScript->replaceOperandWith(1, TBAARoot);
136618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  }
136718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1368bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  virtual bool runOnModule(llvm::Module &Module) {
1369bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bool Changed  = false;
1370bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    this->Module  = &Module;
13714e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    Context = &Module.getContext();
1372bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
13734e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    buildTypes();
1374bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
1375bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bcinfo::MetadataExtractor me(&Module);
137625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    if (!me.extract()) {
137725eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      ALOGE("Could not extract metadata from module!");
137825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      return false;
137925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    }
13804e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
138197e50993c70083fdedb4f1dd2c487aa55c6f60cfDavid Gross    mStructExplicitlyPaddedBySlang = (me.getCompilerVersion() >= SlangVersion::N_STRUCT_EXPLICIT_PADDING);
138297e50993c70083fdedb4f1dd2c487aa55c6f60cfDavid Gross
13834e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Expand forEach_* style kernels.
138425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachCount = me.getExportForEachSignatureCount();
138525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachNameList = me.getExportForEachNameList();
138625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachSignatureList = me.getExportForEachSignatureList();
1387db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
138825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    for (size_t i = 0; i < mExportForEachCount; ++i) {
138925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      const char *name = mExportForEachNameList[i];
139025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      uint32_t signature = mExportForEachSignatureList[i];
1391bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Function *kernel = Module.getFunction(name);
1392cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser      if (kernel) {
1393d88177580db4ddedf680854c51db333c97eabc59Stephen Hines        if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
13944e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala          Changed |= ExpandForEach(kernel, signature);
1395acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
1396acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        } else if (kernel->getReturnType()->isVoidTy()) {
13974e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala          Changed |= ExpandOldStyleForEach(kernel, signature);
1398acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
1399acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        } else {
1400acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // There are some graphics root functions that are not
1401acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // expanded, but that will be called directly. For those
1402acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // functions, we can not set the linkage to internal.
1403acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        }
1404cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines      }
1405db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
1406db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1407e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Process general reduce_* style functions.
14089fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross    const size_t ExportReduceCount = me.getExportReduceCount();
14099fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross    const bcinfo::MetadataExtractor::Reduce *ExportReduceList = me.getExportReduceList();
1410e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    //   Note that functions can be shared between kernels
1411dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    FunctionSet PromotedFunctions, ExpandedAccumulators, AccumulatorsForCombiners;
1412e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
14139fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross    for (size_t i = 0; i < ExportReduceCount; ++i) {
14149fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross      Changed |= PromoteReduceFunction(ExportReduceList[i].mInitializerName, PromotedFunctions);
14159fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross      Changed |= PromoteReduceFunction(ExportReduceList[i].mCombinerName, PromotedFunctions);
14169fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross      Changed |= PromoteReduceFunction(ExportReduceList[i].mOutConverterName, PromotedFunctions);
1417e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1418e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      // Accumulator
14199fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross      llvm::Function *accumulator = Module.getFunction(ExportReduceList[i].mAccumulatorName);
1420e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      bccAssert(accumulator != nullptr);
1421e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (ExpandedAccumulators.insert(accumulator).second)
14229fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross        Changed |= ExpandReduceAccumulator(accumulator,
14239fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross                                           ExportReduceList[i].mSignature,
14249fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross                                           ExportReduceList[i].mInputCount);
14259fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross      if (!ExportReduceList[i].mCombinerName) {
1426dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross        if (AccumulatorsForCombiners.insert(accumulator).second)
14279fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross          Changed |= CreateReduceCombinerFromAccumulator(accumulator);
1428dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      }
1429e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    }
1430e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
14314e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (gEnableRsTbaa && !allocPointersExposed(Module)) {
1432bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      connectRenderScriptTBAAMetadata(Module);
143318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
143418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1435cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    return Changed;
1436db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
1437db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1438db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  virtual const char *getPassName() const {
14394e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    return "forEach_* and reduce_* function expansion";
1440db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
1441db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
14424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala}; // end RSKernelExpandPass
1443db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
14447a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end anonymous namespace
14457a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
14464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walachar RSKernelExpandPass::ID = 0;
14474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walastatic llvm::RegisterPass<RSKernelExpandPass> X("kernelexp", "Kernel Expand Pass");
1448db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1449db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace bcc {
1450db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
14514165d29822fc7caf81e435995ff6189608fc0323Dean De Leoconst char BCC_INDEX_VAR_NAME[] = "rsIndex";
14524165d29822fc7caf81e435995ff6189608fc0323Dean De Leo
14537a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaollvm::ModulePass *
14544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt WalacreateRSKernelExpandPass(bool pEnableStepOpt) {
14554e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  return new RSKernelExpandPass(pEnableStepOpt);
14567a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao}
1457db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
14587a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end namespace bcc
1459