RSKernelExpand.cpp revision a2dd52f0710c214e00c1a13e25116e1af5eec77a
1db169187dea4602e4ad32058762d23d474753fd0Stephen Hines/*
2db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Copyright 2012, The Android Open Source Project
3db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
4db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Licensed under the Apache License, Version 2.0 (the "License");
5db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * you may not use this file except in compliance with the License.
6db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * You may obtain a copy of the License at
7db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
8db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *     http://www.apache.org/licenses/LICENSE-2.0
9db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
10db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Unless required by applicable law or agreed to in writing, software
11db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * distributed under the License is distributed on an "AS IS" BASIS,
12db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * See the License for the specific language governing permissions and
14db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * limitations under the License.
15db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */
16db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
17a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet#include "Assert.h"
18a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet#include "Log.h"
19a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet#include "RSTransforms.h"
20a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet#include "RSUtils.h"
21a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet
22a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet#include "bcc/Config.h"
23a2dd52f0710c214e00c1a13e25116e1af5eec77aJean-Luc Brouillet#include "bcinfo/MetadataExtractor.h"
247a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
257a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao#include <cstdlib>
2633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross#include <functional>
27e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross#include <unordered_set>
287a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
29b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DerivedTypes.h>
30b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Function.h>
31b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Instructions.h>
32b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/IRBuilder.h>
3318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser#include <llvm/IR/MDBuilder.h>
34b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Module.h>
35c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Pass.h>
367ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines#include <llvm/Support/raw_ostream.h>
37b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DataLayout.h>
38cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser#include <llvm/IR/Function.h>
39b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Type.h>
40806075b3a54af826fea78490fb213d8a0784138eTobias Grosser#include <llvm/Transforms/Utils/BasicBlockUtils.h>
41c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang
424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala#ifndef __DISABLE_ASSERTS
434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala// Only used in bccAssert()
444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst int kNumExpandedForeachParams = 4;
459fa4d4480252ecfe08c97bc35888360b1e19ec99David Grossconst int kNumExpandedReduceAccumulatorParams = 4;
464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala#endif
474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst char kRenderScriptTBAARootName[] = "RenderScript Distinct TBAA";
494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst char kRenderScriptTBAANodeName[] = "RenderScript TBAA";
50bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
517a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaousing namespace bcc;
527a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
53db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace {
547a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
55354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hinesstatic const bool gEnableRsTbaa = true;
569c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines
57797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross/* RSKernelExpandPass
58797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross *
59797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * This pass generates functions used to implement calls via
60797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * rsForEach(), "foreach_<NAME>", or "reduce_<NAME>". We create an
61797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * inner loop for the function to be invoked over the appropriate data
62797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * cells of the input/output allocations (adjusting other relevant
63797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * parameters as we go). We support doing this for any forEach or
64797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * reduce style compute kernels.
65797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross *
66797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * In the case of a foreach kernel or a simple reduction kernel, the
67797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * new function name is the original function name "<NAME>" followed
68797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * by ".expand" -- "<NAME>.expand".
69797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross *
70797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * In the case of a general reduction kernel, the kernel's accumulator
71797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * function is the one transformed, and the new function name is the
72797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * original accumulator function name "<ACCUMFN>" followed by
73797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * ".expand" -- "<ACCUMFN>.expand". Using the name "<ACCUMFN>.expand"
74797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * for the function generated from the accumulator should not
75797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * introduce any possibility for name clashes today: The accumulator
76797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * function <ACCUMFN> must be static, so it cannot also serve as a
77797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * foreach kernel; and the code for <ACCUMFN>.expand depends only on
78797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * <ACCUMFN>, not on any other properties of the reduction kernel, so
79797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * any reduction kernels that share the accumulator <ACCUMFN> can
80797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * share <ACCUMFN>.expand also.
81797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross *
82797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * Note that this pass does not delete the original function <NAME> or
83797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * <ACCUMFN>. However, if it is inlined into the newly-generated
84797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * function and not otherwise referenced, then a subsequent pass may
85797b51672914cbe3f3b75e99df7ee7c25560dab7David Gross * delete it.
867a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao */
874e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaclass RSKernelExpandPass : public llvm::ModulePass {
8833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grosspublic:
89db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  static char ID;
90db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
9133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grossprivate:
92e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  static const size_t RS_KERNEL_INPUT_LIMIT = 8; // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h
93e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
94e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  typedef std::unordered_set<llvm::Function *> FunctionSet;
95e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
96e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  enum RsLaunchDimensionsField {
97e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldX,
98e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldY,
99e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldZ,
100e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldLod,
101e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldFace,
102e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldArray,
103e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
104e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldCount
105e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  };
106e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
107e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  enum RsExpandKernelDriverInfoPfxField {
108e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInPtr,
109e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInStride,
110e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInLen,
111e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutPtr,
112e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutStride,
113e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutLen,
114e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldDim,
115e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldCurrent,
116e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldUsr,
117e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldUsLenr,
118e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
119e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldCount
120e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  };
12133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
122bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::Module *Module;
123bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::LLVMContext *Context;
124bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
125bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  /*
1264e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala   * Pointers to LLVM type information for the the function signatures
1274e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala   * for expanded functions. These must be re-calculated for each module
1284e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala   * the pass is run on.
129bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes   */
1309fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross  llvm::FunctionType *ExpandedForEachType;
131e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  llvm::Type *RsExpandKernelDriverInfoPfxTy;
132db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
13325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  uint32_t mExportForEachCount;
13425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  const char **mExportForEachNameList;
13525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  const uint32_t *mExportForEachSignatureList;
136cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines
1372b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // Turns on optimization of allocation stride values.
1382b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  bool mEnableStepOpt;
1392b04086acbef6520ae2c54a868b1271abf053122Stephen Hines
140bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  uint32_t getRootSignature(llvm::Function *Function) {
141db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    const llvm::NamedMDNode *ExportForEachMetadata =
142bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes        Module->getNamedMetadata("#rs_export_foreach");
143db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
144db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (!ExportForEachMetadata) {
145db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      llvm::SmallVector<llvm::Type*, 8> RootArgTys;
146bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      for (llvm::Function::arg_iterator B = Function->arg_begin(),
147bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                                        E = Function->arg_end();
148db169187dea4602e4ad32058762d23d474753fd0Stephen Hines           B != E;
149db169187dea4602e4ad32058762d23d474753fd0Stephen Hines           ++B) {
150db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        RootArgTys.push_back(B->getType());
151db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      }
152db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
153db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // For pre-ICS bitcode, we may not have signature information. In that
154db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // case, we use the size of the RootArgTys to select the number of
155db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // arguments.
156db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      return (1 << RootArgTys.size()) - 1;
157db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
158db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1597ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    if (ExportForEachMetadata->getNumOperands() == 0) {
1607ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      return 0;
1617ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
1627ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
1636e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines    bccAssert(ExportForEachMetadata->getNumOperands() > 0);
164db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
165cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // We only handle the case for legacy root() functions here, so this is
166cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // hard-coded to look at only the first such function.
167db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
168900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    if (SigNode != nullptr && SigNode->getNumOperands() == 1) {
1691bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines      llvm::Metadata *SigMD = SigNode->getOperand(0);
1701bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines      if (llvm::MDString *SigS = llvm::dyn_cast<llvm::MDString>(SigMD)) {
1711bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines        llvm::StringRef SigString = SigS->getString();
172db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        uint32_t Signature = 0;
173db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        if (SigString.getAsInteger(10, Signature)) {
174db169187dea4602e4ad32058762d23d474753fd0Stephen Hines          ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
175db169187dea4602e4ad32058762d23d474753fd0Stephen Hines          return 0;
176db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        }
177db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        return Signature;
178db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      }
179db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
180db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
181db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return 0;
182db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
183db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
184429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray  bool isStepOptSupported(llvm::Type *AllocType) {
185429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
186429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
187429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
188429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
189429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (mEnableStepOpt) {
190429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
191429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
192429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
193429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType == VoidPtrTy) {
194429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
195429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
196429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
197429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (!PT) {
198429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
199429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
200429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
201429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // remaining conditions are 64-bit only
202429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (VoidPtrTy->getPrimitiveSizeInBits() == 32) {
203429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return true;
204429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
205429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
206429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // coerce suggests an upconverted struct type, which we can't support
207429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) {
208429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
209429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
210429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
211429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported
212429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2);
213429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128);
214429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType == V2xi64Ty || AllocType == Int128Ty) {
215429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
216429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
217429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
218429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    return true;
219429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray  }
220429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
2212b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // Get the actual value we should use to step through an allocation.
2227b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  //
2237b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // Normally the value we use to step through an allocation is given to us by
2247b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // the driver. However, for certain primitive data types, we can derive an
2257b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // integer constant for the step value. We use this integer constant whenever
2267b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // possible to allow further compiler optimizations to take place.
2277b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  //
228b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines  // DL - Target Data size/layout information.
2292b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // T - Type of allocation (should be a pointer).
2302b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // OrigStep - Original step increment (root.expand() input from driver).
231bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
2322b04086acbef6520ae2c54a868b1271abf053122Stephen Hines                            llvm::Value *OrigStep) {
233b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines    bccAssert(DL);
234bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bccAssert(AllocType);
2352b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    bccAssert(OrigStep);
236bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
237429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (isStepOptSupported(AllocType)) {
2382b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      llvm::Type *ET = PT->getElementType();
239b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines      uint64_t ETSize = DL->getTypeAllocSize(ET);
240bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
2412b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      return llvm::ConstantInt::get(Int32Ty, ETSize);
2422b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    } else {
2432b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      return OrigStep;
2442b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    }
2452b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  }
2462b04086acbef6520ae2c54a868b1271abf053122Stephen Hines
247097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes  /// Builds the types required by the pass for the given context.
248bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  void buildTypes(void) {
249e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    // Create the RsLaunchDimensionsTy and RsExpandKernelDriverInfoPfxTy structs.
250bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
251e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8Ty                   = llvm::Type::getInt8Ty(*Context);
252e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8PtrTy                = Int8Ty->getPointerTo();
253e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8PtrArrayInputLimitTy = llvm::ArrayType::get(Int8PtrTy, RS_KERNEL_INPUT_LIMIT);
254e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32Ty                  = llvm::Type::getInt32Ty(*Context);
255e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32ArrayInputLimitTy   = llvm::ArrayType::get(Int32Ty, RS_KERNEL_INPUT_LIMIT);
256e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *VoidPtrTy                = llvm::Type::getInt8PtrTy(*Context);
257e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32Array4Ty            = llvm::ArrayType::get(Int32Ty, 4);
258097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
259097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes    /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h:
260db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *
261e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * struct RsLaunchDimensions {
262e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t x;
263db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   uint32_t y;
264db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   uint32_t z;
265e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t lod;
266e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t face;
267e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t array[4];
268e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * };
269e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     */
270e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::SmallVector<llvm::Type*, RsLaunchDimensionsFieldCount> RsLaunchDimensionsTypes;
271e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t x
272e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t y
273e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t z
274e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t lod
275e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t face
276e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Array4Ty); // uint32_t array[4]
277e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::StructType *RsLaunchDimensionsTy =
278e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        llvm::StructType::create(RsLaunchDimensionsTypes, "RsLaunchDimensions");
279e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
2801d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross    /* Defined as the beginning of RsExpandKernelDriverInfo in frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h:
281e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
282e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * struct RsExpandKernelDriverInfoPfx {
283e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT];
284e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t inStride[RS_KERNEL_INPUT_LIMIT];
285e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t inLen;
286e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
287e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT];
288e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t outStride[RS_KERNEL_INPUT_LIMIT];
289e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t outLen;
290e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
291e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // Dimension of the launch
292e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     RsLaunchDimensions dim;
293e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
294e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // The walking iterator of the launch
295e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     RsLaunchDimensions current;
296e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
297e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     const void *usr;
298e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t usrLen;
299e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
300e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // Items below this line are not used by the compiler and can be change in the driver.
301e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // So the compiler must assume there are an unknown number of fields of unknown type
302e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // beginning here.
303db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     * };
3041d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross     *
3051d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross     * The name "RsExpandKernelDriverInfoPfx" is known to RSInvariantPass (RSInvariant.cpp).
306db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     */
307e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::SmallVector<llvm::Type*, RsExpandKernelDriverInfoPfxFieldCount> RsExpandKernelDriverInfoPfxTypes;
308e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT]
309e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy);   // uint32_t inStride[RS_KERNEL_INPUT_LIMIT]
310e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t inLen
311e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT]
312e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy);   // uint32_t outStride[RS_KERNEL_INPUT_LIMIT]
313e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t outLen
314e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy);     // RsLaunchDimensions dim
315e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy);     // RsLaunchDimensions current
316e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy);                // const void *usr
317e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t usrLen
318e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    RsExpandKernelDriverInfoPfxTy =
319e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx");
320bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
321bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    // Create the function type for expanded kernels.
3224e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
323bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
324e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *RsExpandKernelDriverInfoPfxPtrTy = RsExpandKernelDriverInfoPfxTy->getPointerTo();
3254e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // void (const RsExpandKernelDriverInfoPfxTy *p, uint32_t x1, uint32_t x2, uint32_t outstep)
3264e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    ExpandedForEachType = llvm::FunctionType::get(VoidTy,
3274e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala        {RsExpandKernelDriverInfoPfxPtrTy, Int32Ty, Int32Ty, Int32Ty}, false);
3288ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  }
3298ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
3304e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  /// @brief Create skeleton of the expanded foreach kernel.
331357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
332357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  /// This creates a function with the following signature:
333357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
334357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
3355010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes  ///         uint32_t outstep)
336357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
3374e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  llvm::Function *createEmptyExpandedForEachKernel(llvm::StringRef OldName) {
338bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
3394e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      llvm::Function::Create(ExpandedForEachType,
340bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                             llvm::GlobalValue::ExternalLinkage,
341bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                             OldName + ".expand", Module);
3424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
343bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
344bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("p");
345bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("x1");
346bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("x2");
347bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("arg_outstep");
3484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
3494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala                                                       ExpandedFunction);
3504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::IRBuilder<> Builder(Begin);
3514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    Builder.CreateRetVoid();
3524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    return ExpandedFunction;
3534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  }
3544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
355e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Create skeleton of a general reduce kernel's expanded accumulator.
356e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
357e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // This creates a function with the following signature:
358e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
359e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //  void @func.expand(%RsExpandKernelDriverInfoPfx* nocapture %p,
360e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                    i32 %x1, i32 %x2, accumType* nocapture %accum)
361e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
3629fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross  llvm::Function *createEmptyExpandedReduceAccumulator(llvm::StringRef OldName,
3639fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross                                                       llvm::Type *AccumArgTy) {
364e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
365e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
3669fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross    llvm::FunctionType *ExpandedReduceAccumulatorType =
367e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        llvm::FunctionType::get(VoidTy,
368e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                {RsExpandKernelDriverInfoPfxTy->getPointerTo(),
369e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 Int32Ty, Int32Ty, AccumArgTy}, false);
370e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Function *FnExpandedAccumulator =
3719fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross      llvm::Function::Create(ExpandedReduceAccumulatorType,
372e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                             llvm::GlobalValue::ExternalLinkage,
373e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                             OldName + ".expand", Module);
3749fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross    bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceAccumulatorParams);
375e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
376e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Function::arg_iterator AI = FnExpandedAccumulator->arg_begin();
377e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
378e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    using llvm::Attribute;
379e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
380e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Argument *Arg_p = &(*AI++);
381e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_p->setName("p");
382e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_p->addAttr(llvm::AttributeSet::get(*Context, Arg_p->getArgNo() + 1,
383e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                           llvm::makeArrayRef(Attribute::NoCapture)));
384e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
385e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Argument *Arg_x1 = &(*AI++);
386e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_x1->setName("x1");
387e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
388e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Argument *Arg_x2 = &(*AI++);
389e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_x2->setName("x2");
390e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
391e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Argument *Arg_accum = &(*AI++);
392e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_accum->setName("accum");
393e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Arg_accum->addAttr(llvm::AttributeSet::get(*Context, Arg_accum->getArgNo() + 1,
394e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                               llvm::makeArrayRef(Attribute::NoCapture)));
395e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
396e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
397e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                       FnExpandedAccumulator);
398e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::IRBuilder<> Builder(Begin);
399e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Builder.CreateRetVoid();
400e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
401e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    return FnExpandedAccumulator;
402e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
403e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
404e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @brief Create an empty loop
405e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
406e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// Create a loop of the form:
407e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
408e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// for (i = LowerBound; i < UpperBound; i++)
409e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///   ;
410e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
411e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// After the loop has been created, the builder is set such that
412e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// instructions can be added to the loop body.
413e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
414e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param Builder The builder to use to build this loop. The current
415e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///                position of the builder is the position the loop
416e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///                will be inserted.
417e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param LowerBound The first value of the loop iterator
418e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param UpperBound The maximal value of the loop iterator
419e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param LoopIV A reference that will be set to the loop iterator.
420e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @return The BasicBlock that will be executed after the loop.
421e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
422e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser                               llvm::Value *LowerBound,
423e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser                               llvm::Value *UpperBound,
4244165d29822fc7caf81e435995ff6189608fc0323Dean De Leo                               llvm::Value **LoopIV) {
425c2ca742d7d0197c52e49467862844463fb42280fDavid Gross    bccAssert(LowerBound->getType() == UpperBound->getType());
426e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
427e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
4284165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    llvm::Value *Cond, *IVNext, *IV, *IVVar;
429e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
430e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    CondBB = Builder.GetInsertBlock();
431f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar    AfterBB = llvm::SplitBlock(CondBB, &*Builder.GetInsertPoint(), nullptr, nullptr);
432bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
433e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
4344165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    CondBB->getTerminator()->eraseFromParent();
4354165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    Builder.SetInsertPoint(CondBB);
4364165d29822fc7caf81e435995ff6189608fc0323Dean De Leo
4374165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    // decltype(LowerBound) *ivvar = alloca(sizeof(int))
4384165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    // *ivvar = LowerBound
4394165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    IVVar = Builder.CreateAlloca(LowerBound->getType(), nullptr, BCC_INDEX_VAR_NAME);
4404165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    Builder.CreateStore(LowerBound, IVVar);
4414165d29822fc7caf81e435995ff6189608fc0323Dean De Leo
442e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // if (LowerBound < Upperbound)
443e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto LoopHeader
444e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // else
445e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto AfterBB
446e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser    Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
447e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
448e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
4494165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    // LoopHeader:
4504165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    //   iv = *ivvar
4514165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    //   <insertion point here>
4524165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    //   iv.next = iv + 1
4534165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    //   *ivvar = iv.next
4544165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    //   if (iv.next < Upperbound)
4554165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    //     goto LoopHeader
4564165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    //   else
4574165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    //     goto AfterBB
4584165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    // AfterBB:
459e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.SetInsertPoint(HeaderBB);
4604165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    IV = Builder.CreateLoad(IVVar, "X");
461e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
4624165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    Builder.CreateStore(IVNext, IVVar);
463e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser    Cond = Builder.CreateICmpULT(IVNext, UpperBound);
464e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
465e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    AfterBB->setName("Exit");
4664165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    Builder.SetInsertPoint(llvm::cast<llvm::Instruction>(IVNext));
4674165d29822fc7caf81e435995ff6189608fc0323Dean De Leo
4684165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    // Record information about this loop.
469e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    *LoopIV = IV;
470e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    return AfterBB;
471e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  }
472e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
47328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // Finish building the outgoing argument list for calling a ForEach-able function.
47428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //
47528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // ArgVector - on input, the non-special arguments
47628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //             on output, the non-special arguments combined with the special arguments
47728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //               from SpecialArgVector
47828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // SpecialArgVector - special arguments (from ExpandSpecialArguments())
47928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // SpecialArgContextIdx - return value of ExpandSpecialArguments()
48028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //                          (position of context argument in SpecialArgVector)
48128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // CalleeFunction - the ForEach-able function being called
48228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // Builder - for inserting code into the caller function
48328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  template<unsigned int ArgVectorLen, unsigned int SpecialArgVectorLen>
48428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  void finishArgList(      llvm::SmallVector<llvm::Value *, ArgVectorLen>        &ArgVector,
48528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     const llvm::SmallVector<llvm::Value *, SpecialArgVectorLen> &SpecialArgVector,
48628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     const int SpecialArgContextIdx,
48728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     const llvm::Function &CalleeFunction,
48828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     llvm::IRBuilder<> &CallerBuilder) {
48928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    /* The context argument (if any) is a pointer to an opaque user-visible type that differs from
49028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     * the RsExpandKernelDriverInfoPfx type used in the function we are generating (although the
49128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     * two types represent the same thing).  Therefore, we must introduce a pointer cast when
49228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     * generating a call to the kernel function.
49328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     */
49428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    const int ArgContextIdx =
49528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross        SpecialArgContextIdx >= 0 ? (ArgVector.size() + SpecialArgContextIdx) : SpecialArgContextIdx;
49628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    ArgVector.append(SpecialArgVector.begin(), SpecialArgVector.end());
49728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    if (ArgContextIdx >= 0) {
49828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      llvm::Type *ContextArgType = nullptr;
49928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      int ArgIdx = ArgContextIdx;
50028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      for (const auto &Arg : CalleeFunction.getArgumentList()) {
50128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross        if (!ArgIdx--) {
50228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross          ContextArgType = Arg.getType();
50328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross          break;
50428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross        }
50528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      }
50628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      bccAssert(ContextArgType);
50728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      ArgVector[ArgContextIdx] = CallerBuilder.CreatePointerCast(ArgVector[ArgContextIdx], ContextArgType);
50828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    }
50928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  }
51028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
511083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // GEPHelper() returns a SmallVector of values suitable for passing
512083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // to IRBuilder::CreateGEP(), and SmallGEPIndices is a typedef for
513083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // the returned data type. It is sized so that the SmallVector
514083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // returned by GEPHelper() never needs to do a heap allocation for
515083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // any list of GEP indices it encounters in the code.
516083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  typedef llvm::SmallVector<llvm::Value *, 3> SmallGEPIndices;
517083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
518083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // Helper for turning a list of constant integer GEP indices into a
519083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // SmallVector of llvm::Value*. The return value is suitable for
520083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // passing to a GetElementPtrInst constructor or IRBuilder::CreateGEP().
521083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //
522083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // Inputs:
523083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //   I32Args should be integers which represent the index arguments
524083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //   to a GEP instruction.
525083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //
526083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // Returns:
527083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //   Returns a SmallVector of ConstantInts.
5284e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  SmallGEPIndices GEPHelper(const std::initializer_list<int32_t> I32Args) {
529083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    SmallGEPIndices Out(I32Args.size());
530083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::IntegerType *I32Ty = llvm::Type::getInt32Ty(*Context);
531083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    std::transform(I32Args.begin(), I32Args.end(), Out.begin(),
532083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                   [I32Ty](int32_t Arg) { return llvm::ConstantInt::get(I32Ty, Arg); });
533083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    return Out;
534083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  }
535083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
5368ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosserpublic:
5377e920a716693033edf32a6fedd03798bbfbd85ebChih-Hung Hsieh  explicit RSKernelExpandPass(bool pEnableStepOpt = true)
538900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes      : ModulePass(ID), Module(nullptr), Context(nullptr),
539bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes        mEnableStepOpt(pEnableStepOpt) {
540bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
5418ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  }
5428ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
543c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines  virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
544c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines    // This pass does not use any other analysis passes, but it does
545c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines    // add/wrap the existing functions in the module (thus altering the CFG).
546c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines  }
547c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines
54833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // Build contribution to outgoing argument list for calling a
549e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // ForEach-able function or a general reduction accumulator
550e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // function, based on the special parameters of that function.
55133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  //
552e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Signature - metadata bits for the signature of the callee
55333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // X, Arg_p - values derived directly from expanded function,
554e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //            suitable for computing arguments for the callee
55533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // CalleeArgs - contribution is accumulated here
55633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // Bump - invoked once for each contributed outgoing argument
557083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // LoopHeaderInsertionPoint - an Instruction in the loop header, before which
558083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //                            this function can insert loop-invariant loads
55928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //
56028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // Return value is the (zero-based) position of the context (Arg_p)
56128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // argument in the CalleeArgs vector, or a negative value if the
56228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // context argument is not placed in the CalleeArgs vector.
56328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  int ExpandSpecialArguments(uint32_t Signature,
56428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::Value *X,
56528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::Value *Arg_p,
56628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::IRBuilder<> &Builder,
56728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::SmallVector<llvm::Value*, 8> &CalleeArgs,
5688a019dd0040bedf5078e4d18e06a244a675b80e8Chih-Hung Hsieh                             const std::function<void ()> &Bump,
569083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             llvm::Instruction *LoopHeaderInsertionPoint) {
57028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
57128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    bccAssert(CalleeArgs.empty());
57228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
57328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    int Return = -1;
57433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) {
57533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      CalleeArgs.push_back(Arg_p);
57633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      Bump();
57728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      Return = CalleeArgs.size() - 1;
57833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
57933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
58033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
58133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      CalleeArgs.push_back(X);
58233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      Bump();
58333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
58433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
585e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature) ||
586e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
587083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      bccAssert(LoopHeaderInsertionPoint);
58833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
589083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      // Y and Z are loop invariant, so they can be hoisted out of the
590083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      // loop. Set the IRBuilder insertion point to the loop header.
591083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      auto OldInsertionPoint = Builder.saveIP();
592083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      Builder.SetInsertPoint(LoopHeaderInsertionPoint);
593e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
594e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
595083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        SmallGEPIndices YValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent,
596083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          RsLaunchDimensionsFieldY}));
597083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value *YAddr = Builder.CreateInBoundsGEP(Arg_p, YValueGEP, "Y.gep");
598083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        CalleeArgs.push_back(Builder.CreateLoad(YAddr, "Y"));
599e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        Bump();
600e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      }
601e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
602e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
603083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        SmallGEPIndices ZValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent,
604083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          RsLaunchDimensionsFieldZ}));
605083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value *ZAddr = Builder.CreateInBoundsGEP(Arg_p, ZValueGEP, "Z.gep");
606083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        CalleeArgs.push_back(Builder.CreateLoad(ZAddr, "Z"));
607e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        Bump();
608e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      }
609083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
610083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      Builder.restoreIP(OldInsertionPoint);
61133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
61228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
61328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    return Return;
61433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  }
61533cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
616e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Generate loop-invariant input processing setup code for an expanded
617e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // ForEach-able function or an expanded general reduction accumulator
618e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // function.
619e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
620e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // LoopHeader - block at the end of which the setup code will be inserted
621e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Arg_p - RSKernelDriverInfo pointer passed to the expanded function
622e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // TBAAPointer - metadata for marking loads of pointer values out of RSKernelDriverInfo
623e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // ArgIter - iterator pointing to first input of the UNexpanded function
624e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // NumInputs - number of inputs (NOT number of ARGUMENTS)
625e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
626f039d98d346006442b61255a2889b8513a8cd56fYong Chen  // InTypes[] - this function saves input type, they will be used in ExpandInputsBody().
627f039d98d346006442b61255a2889b8513a8cd56fYong Chen  // InBufPtrs[] - this function sets each array element to point to the first cell / byte
628f039d98d346006442b61255a2889b8513a8cd56fYong Chen  //               (byte for x86, cell for other platforms) of the corresponding input allocation
629e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // InStructTempSlots[] - this function sets each array element either to nullptr
630e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                       or to the result of an alloca (for the case where the
631e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                       calling convention dictates that a value must be passed
632e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                       by reference, and so we need a stacked temporary to hold
633e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //                       a copy of that value)
634e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  void ExpandInputsLoopInvariant(llvm::IRBuilder<> &Builder, llvm::BasicBlock *LoopHeader,
635e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::Value *Arg_p,
636e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::MDNode *TBAAPointer,
637e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::Function::arg_iterator ArgIter,
638e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 const size_t NumInputs,
639f039d98d346006442b61255a2889b8513a8cd56fYong Chen                                 llvm::SmallVectorImpl<llvm::Type *> &InTypes,
640e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs,
641e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                 llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots) {
642e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    bccAssert(NumInputs <= RS_KERNEL_INPUT_LIMIT);
643e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
644e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Extract information about input slots. The work done
645e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // here is loop-invariant, so we can hoist the operations out of the loop.
646e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    auto OldInsertionPoint = Builder.saveIP();
647e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Builder.SetInsertPoint(LoopHeader->getTerminator());
648e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
649e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    for (size_t InputIndex = 0; InputIndex < NumInputs; ++InputIndex, ArgIter++) {
650e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::Type *InType = ArgIter->getType();
651e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
652e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      /*
653e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * AArch64 calling conventions dictate that structs of sufficient size
654e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * get passed by pointer instead of passed by value.  This, combined
655e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * with the fact that we don't allow kernels to operate on pointer
656e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * data means that if we see a kernel with a pointer parameter we know
657e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * that it is a struct input that has been promoted.  As such we don't
658e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * need to convert its type to a pointer.  Later we will need to know
659e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * to create a temporary copy on the stack, so we save this information
660e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       * in InStructTempSlots.
661e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross       */
662e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (auto PtrType = llvm::dyn_cast<llvm::PointerType>(InType)) {
663e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        llvm::Type *ElementType = PtrType->getElementType();
664e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InStructTempSlots.push_back(Builder.CreateAlloca(ElementType, nullptr,
665e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                         "input_struct_slot"));
666e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      } else {
667e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InType = InType->getPointerTo();
668e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InStructTempSlots.push_back(nullptr);
669e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      }
670e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
671e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      SmallGEPIndices InBufPtrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr,
672e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                             static_cast<int32_t>(InputIndex)}));
673e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::Value    *InBufPtrAddr = Builder.CreateInBoundsGEP(Arg_p, InBufPtrGEP, "input_buf.gep");
674e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::LoadInst *InBufPtr = Builder.CreateLoad(InBufPtrAddr, "input_buf");
675f039d98d346006442b61255a2889b8513a8cd56fYong Chen
676f039d98d346006442b61255a2889b8513a8cd56fYong Chen      llvm::Value *CastInBufPtr = nullptr;
677f039d98d346006442b61255a2889b8513a8cd56fYong Chen      if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) {
678f039d98d346006442b61255a2889b8513a8cd56fYong Chen        CastInBufPtr = Builder.CreatePointerCast(InBufPtr, InType, "casted_in");
679f039d98d346006442b61255a2889b8513a8cd56fYong Chen      } else {
680f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // The disagreement between module and x86 target machine datalayout
681f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // causes mismatched input/output data offset between slang reflected
682f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // code and bcc codegen for GetElementPtr. To solve this issue, skip the
683f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // cast to InType and leave CastInBufPtr as an int8_t*.  The buffer is
684f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // later indexed with an explicit byte offset computed based on
685f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // X86_CUSTOM_DL_STRING and then bitcast it to actual input type.
686f039d98d346006442b61255a2889b8513a8cd56fYong Chen        CastInBufPtr = InBufPtr;
687f039d98d346006442b61255a2889b8513a8cd56fYong Chen      }
688e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
689e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (gEnableRsTbaa) {
690e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InBufPtr->setMetadata("tbaa", TBAAPointer);
691e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      }
692e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
693f039d98d346006442b61255a2889b8513a8cd56fYong Chen      InTypes.push_back(InType);
694e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      InBufPtrs.push_back(CastInBufPtr);
695e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    }
696e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
697e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Builder.restoreIP(OldInsertionPoint);
698e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
699e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
700e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Generate loop-varying input processing code for an expanded ForEach-able function
701e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // or an expanded general reduction accumulator function.  Also, for the call to the
702e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // UNexpanded function, collect the portion of the argument list corresponding to the
703e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // inputs.
704e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
705e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Arg_x1 - first X coordinate to be processed by the expanded function
706e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // TBAAAllocation - metadata for marking loads of input values out of allocations
707e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // NumInputs -- number of inputs (NOT number of ARGUMENTS)
708f039d98d346006442b61255a2889b8513a8cd56fYong Chen  // InTypes[] - this function uses the saved input types in ExpandInputsLoopInvariant()
709f039d98d346006442b61255a2889b8513a8cd56fYong Chen  //             to convert the pointer of byte InPtr to its real type.
710e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // InBufPtrs[] - this function consumes the information produced by ExpandInputsLoopInvariant()
711e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // InStructTempSlots[] - this function consumes the information produced by ExpandInputsLoopInvariant()
712e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // IndVar - value of loop induction variable (X coordinate) for a given loop iteration
713e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
714e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // RootArgs - this function sets this to the list of outgoing argument values corresponding
715e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //            to the inputs
716e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  void ExpandInputsBody(llvm::IRBuilder<> &Builder,
717e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        llvm::Value *Arg_x1,
718e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        llvm::MDNode *TBAAAllocation,
719e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        const size_t NumInputs,
720f039d98d346006442b61255a2889b8513a8cd56fYong Chen                        const llvm::SmallVectorImpl<llvm::Type *> &InTypes,
721e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        const llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs,
722e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        const llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots,
723e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        llvm::Value *IndVar,
724e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                        llvm::SmallVectorImpl<llvm::Value *> &RootArgs) {
725e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Offset = Builder.CreateSub(IndVar, Arg_x1);
726f039d98d346006442b61255a2889b8513a8cd56fYong Chen    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
727e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
728e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    for (size_t Index = 0; Index < NumInputs; ++Index) {
729e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
730f039d98d346006442b61255a2889b8513a8cd56fYong Chen      llvm::Value *InPtr = nullptr;
731f039d98d346006442b61255a2889b8513a8cd56fYong Chen      if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) {
732f039d98d346006442b61255a2889b8513a8cd56fYong Chen        InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], Offset);
733f039d98d346006442b61255a2889b8513a8cd56fYong Chen      } else {
734f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // Treat x86 input buffer as byte[], get indexed pointer with explicit
735f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // byte offset computed using a datalayout based on
736f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // X86_CUSTOM_DL_STRING, then bitcast it to actual input type.
737f039d98d346006442b61255a2889b8513a8cd56fYong Chen        llvm::DataLayout DL(X86_CUSTOM_DL_STRING);
738f039d98d346006442b61255a2889b8513a8cd56fYong Chen        llvm::Type *InTy = InTypes[Index];
739f039d98d346006442b61255a2889b8513a8cd56fYong Chen        uint64_t InStep = DL.getTypeAllocSize(InTy->getPointerElementType());
740f039d98d346006442b61255a2889b8513a8cd56fYong Chen        llvm::Value *OffsetInBytes = Builder.CreateMul(Offset, llvm::ConstantInt::get(Int32Ty, InStep));
741f039d98d346006442b61255a2889b8513a8cd56fYong Chen        InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], OffsetInBytes);
742f039d98d346006442b61255a2889b8513a8cd56fYong Chen        InPtr = Builder.CreatePointerCast(InPtr, InTy);
743f039d98d346006442b61255a2889b8513a8cd56fYong Chen      }
744f039d98d346006442b61255a2889b8513a8cd56fYong Chen
745f039d98d346006442b61255a2889b8513a8cd56fYong Chen      llvm::Value *Input;
746e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
747e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
748e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (gEnableRsTbaa) {
749e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        InputLoad->setMetadata("tbaa", TBAAAllocation);
750e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      }
751e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
752e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (llvm::Value *TemporarySlot = InStructTempSlots[Index]) {
753e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // Pass a pointer to a temporary on the stack, rather than
754e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // passing a pointer to the original value. We do not want
755e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // the kernel to potentially modify the input data.
756e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
757e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // Note: don't annotate with TBAA, since the kernel might
758e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        // have its own TBAA annotations for the pointer argument.
759e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        Builder.CreateStore(InputLoad, TemporarySlot);
760e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        Input = TemporarySlot;
761e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      } else {
762e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        Input = InputLoad;
763e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      }
764e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
765e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      RootArgs.push_back(Input);
766e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    }
767e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
768e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
7698ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  /* Performs the actual optimization on a selected function. On success, the
7708ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   * Module will contain a new function of the name "<NAME>.expand" that
7718ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   * invokes <NAME>() in a loop with the appropriate parameters.
7728ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   */
7734e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  bool ExpandOldStyleForEach(llvm::Function *Function, uint32_t Signature) {
774bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    ALOGV("Expanding ForEach-able Function %s",
775bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes          Function->getName().str().c_str());
7768ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
7778ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser    if (!Signature) {
778bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      Signature = getRootSignature(Function);
7798ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser      if (!Signature) {
7808ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        // We couldn't determine how to expand this function based on its
7818ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        // function signature.
7828ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        return false;
7838ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser      }
7848ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser    }
7858ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
786bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::DataLayout DL(Module);
787f039d98d346006442b61255a2889b8513a8cd56fYong Chen    if (Module->getTargetTriple() == DEFAULT_X86_TRIPLE_STRING) {
788f039d98d346006442b61255a2889b8513a8cd56fYong Chen      DL.reset(X86_CUSTOM_DL_STRING);
789f039d98d346006442b61255a2889b8513a8cd56fYong Chen    }
7908ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
791bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
7924e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      createEmptyExpandedForEachKernel(Function->getName());
793db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
794bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    /*
795bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * Extract the expanded function's parameters.  It is guaranteed by
796e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross     * createEmptyExpandedForEachKernel that there will be four parameters.
797bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     */
79833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
7994e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
80033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
801bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator ExpandedFunctionArgIter =
802bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      ExpandedFunction->arg_begin();
803db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
804bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
805bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
806bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
8075010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes    llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
808bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
809900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *InStep  = nullptr;
810900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutStep = nullptr;
811db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
812db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // Construct the actual function body.
813f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar    llvm::IRBuilder<> Builder(&*ExpandedFunction->getEntryBlock().begin());
814db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
815cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // Collect and construct the arguments for the kernel().
816db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // Note that we load any loop-invariant arguments before entering the Loop.
817bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
818db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
819900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Type  *InTy      = nullptr;
820083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::Value *InBufPtr = nullptr;
821d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
822083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices InStepGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInStride, 0}));
823083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      llvm::LoadInst *InStepArg  = Builder.CreateLoad(
824083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        Builder.CreateInBoundsGEP(Arg_p, InStepGEP, "instep_addr.gep"), "instep_addr");
825e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
826bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      InTy = (FunctionArgIter++)->getType();
827e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes      InStep = getStepValue(&DL, InTy, InStepArg);
828e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
8292b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      InStep->setName("instep");
830e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
831083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices InputAddrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr, 0}));
832083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      InBufPtr = Builder.CreateLoad(
833083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        Builder.CreateInBoundsGEP(Arg_p, InputAddrGEP, "input_buf.gep"), "input_buf");
834db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
835db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
836900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Type *OutTy = nullptr;
837900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutBasePtr = nullptr;
838d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
839bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      OutTy = (FunctionArgIter++)->getType();
840b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
8412b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      OutStep->setName("outstep");
842083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0}));
843083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep"));
844db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
845db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
846900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *UsrData = nullptr;
847d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
848bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
849083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      llvm::Value *UsrDataPointerAddr = Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldUsr);
850083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      UsrData = Builder.CreatePointerCast(Builder.CreateLoad(UsrDataPointerAddr), UsrDataTy);
851db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      UsrData->setName("UsrData");
852db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
853db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
854083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
8554165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    llvm::Value *IV;
85633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    createLoop(Builder, Arg_x1, Arg_x2, &IV);
857097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
85833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
85928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    const int CalleeArgsContextIdx = ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
860083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                                                            [&FunctionArgIter]() { FunctionArgIter++; },
861083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                                                            LoopHeader->getTerminator());
862db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
863bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bccAssert(FunctionArgIter == Function->arg_end());
864db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
865cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // Populate the actual call to kernel().
866db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::SmallVector<llvm::Value*, 8> RootArgs;
867db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
868900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *InPtr  = nullptr;
869900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutPtr = nullptr;
870db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
871ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    // Calculate the current input and output pointers
87202f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    //
873ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    // We always calculate the input/output pointers with a GEP operating on i8
87402f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // values and only cast at the very end to OutTy. This is because the step
87502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // between two values is given in bytes.
87602f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    //
87702f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // TODO: We could further optimize the output by using a GEP operation of
87802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // type 'OutTy' in cases where the element type of the allocation allows.
87902f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    if (OutBasePtr) {
88002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
88102f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      OutOffset = Builder.CreateMul(OutOffset, OutStep);
882083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutPtr = Builder.CreateInBoundsGEP(OutBasePtr, OutOffset);
88302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
88402f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    }
885bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
886083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    if (InBufPtr) {
887ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
888ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      InOffset = Builder.CreateMul(InOffset, InStep);
889083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      InPtr = Builder.CreateInBoundsGEP(InBufPtr, InOffset);
890ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      InPtr = Builder.CreatePointerCast(InPtr, InTy);
891ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    }
89202f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser
893ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    if (InPtr) {
8947ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      RootArgs.push_back(InPtr);
895db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
896db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
89702f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    if (OutPtr) {
8987ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      RootArgs.push_back(OutPtr);
899db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
900db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
901db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (UsrData) {
902db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      RootArgs.push_back(UsrData);
903db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
904db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
90528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
906db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
907bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    Builder.CreateCall(Function, RootArgs);
908db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
9097ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    return true;
9107ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines  }
9117ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
9124e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  /* Expand a pass-by-value foreach kernel.
9137ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines   */
9144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  bool ExpandForEach(llvm::Function *Function, uint32_t Signature) {
915d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
916bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
9177ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
9184e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // TODO: Refactor this to share functionality with ExpandOldStyleForEach.
919bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::DataLayout DL(Module);
920f039d98d346006442b61255a2889b8513a8cd56fYong Chen    if (Module->getTargetTriple() == DEFAULT_X86_TRIPLE_STRING) {
921f039d98d346006442b61255a2889b8513a8cd56fYong Chen      DL.reset(X86_CUSTOM_DL_STRING);
922f039d98d346006442b61255a2889b8513a8cd56fYong Chen    }
923f039d98d346006442b61255a2889b8513a8cd56fYong Chen    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
9247ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
925bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
9264e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      createEmptyExpandedForEachKernel(Function->getName());
9277ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
928bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    /*
929bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * Extract the expanded function's parameters.  It is guaranteed by
930e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross     * createEmptyExpandedForEachKernel that there will be four parameters.
931bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     */
932881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
9334e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
934881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
935bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator ExpandedFunctionArgIter =
936bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      ExpandedFunction->arg_begin();
937bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
938bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
939bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
940bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
9413bc475b206c3fa249a212b90fe989fdcda4d75f9Matt Wala    // Arg_outstep is not used by expanded new-style forEach kernels.
9427ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
9437ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    // Construct the actual function body.
944f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar    llvm::IRBuilder<> Builder(&*ExpandedFunction->getEntryBlock().begin());
9457ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
94618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Create TBAA meta-data.
947354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript,
948354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines                 *TBAAAllocation, *TBAAPointer;
949bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDBuilder MDHelper(*Context);
95014588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien
951354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    TBAARenderScriptDistinct =
9524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      MDHelper.createTBAARoot(kRenderScriptTBAARootName);
9534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
954354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines        TBAARenderScriptDistinct);
955e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
956e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                       TBAARenderScript);
957e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
958e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                      TBAAAllocation, 0);
959e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
960e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                    TBAARenderScript);
96114588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien    TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
96218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
963881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    /*
964881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     * Collect and construct the arguments for the kernel().
965881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     *
966881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     * Note that we load any loop-invariant arguments before entering the Loop.
967881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     */
968083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    size_t NumRemainingInputs = Function->arg_size();
9697ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
970881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // No usrData parameter on kernels.
971881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    bccAssert(
972881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
973881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
974881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    llvm::Function::arg_iterator ArgIter = Function->arg_begin();
975881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
976881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Check the return type
977bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::Type     *OutTy            = nullptr;
978bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::LoadInst *OutBasePtr       = nullptr;
979bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::Value    *CastedOutBasePtr = nullptr;
980881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
981e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    bool PassOutByPointer = false;
982881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
983d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
984bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *OutBaseTy = Function->getReturnType();
985881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
98674a4b08235990916911b8fe758d656c1171faf26Stephen Hines      if (OutBaseTy->isVoidTy()) {
987e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes        PassOutByPointer = true;
988881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        OutTy = ArgIter->getType();
989881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
990881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        ArgIter++;
991083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        --NumRemainingInputs;
99274a4b08235990916911b8fe758d656c1171faf26Stephen Hines      } else {
99374a4b08235990916911b8fe758d656c1171faf26Stephen Hines        // We don't increment Args, since we are using the actual return type.
994881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        OutTy = OutBaseTy->getPointerTo();
99574a4b08235990916911b8fe758d656c1171faf26Stephen Hines      }
996881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
997083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0}));
998083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep"));
999097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
10009c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      if (gEnableRsTbaa) {
10019c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines        OutBasePtr->setMetadata("tbaa", TBAAPointer);
10029c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      }
100350f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray
1004f039d98d346006442b61255a2889b8513a8cd56fYong Chen      if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) {
1005f039d98d346006442b61255a2889b8513a8cd56fYong Chen        CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out");
1006f039d98d346006442b61255a2889b8513a8cd56fYong Chen      } else {
1007f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // The disagreement between module and x86 target machine datalayout
1008f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // causes mismatched input/output data offset between slang reflected
1009f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // code and bcc codegen for GetElementPtr. To solve this issue, skip the
1010f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // cast to OutTy and leave CastedOutBasePtr as an int8_t*.  The buffer
1011f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // is later indexed with an explicit byte offset computed based on
1012f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // X86_CUSTOM_DL_STRING and then bitcast it to actual output type.
1013f039d98d346006442b61255a2889b8513a8cd56fYong Chen        CastedOutBasePtr = OutBasePtr;
1014f039d98d346006442b61255a2889b8513a8cd56fYong Chen      }
101574a4b08235990916911b8fe758d656c1171faf26Stephen Hines    }
101674a4b08235990916911b8fe758d656c1171faf26Stephen Hines
1017f039d98d346006442b61255a2889b8513a8cd56fYong Chen    llvm::SmallVector<llvm::Type*,  8> InTypes;
1018083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
1019d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala    llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
1020881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
1021083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    bccAssert(NumRemainingInputs <= RS_KERNEL_INPUT_LIMIT);
1022881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
1023083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // Create the loop structure.
1024083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
10254165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    llvm::Value *IV;
1026083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    createLoop(Builder, Arg_x1, Arg_x2, &IV);
1027881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
1028083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
1029083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    const int CalleeArgsContextIdx =
1030083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
1031083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             [&NumRemainingInputs]() { --NumRemainingInputs; },
1032083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             LoopHeader->getTerminator());
1033083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
1034083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // After ExpandSpecialArguments() gets called, NumRemainingInputs
1035083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // counts the number of arguments to the kernel that correspond to
1036083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // an array entry from the InPtr field of the DriverInfo
1037083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // structure.
1038083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    const size_t NumInPtrArguments = NumRemainingInputs;
1039083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
1040083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    if (NumInPtrArguments > 0) {
1041e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, ArgIter, NumInPtrArguments,
1042f039d98d346006442b61255a2889b8513a8cd56fYong Chen                                InTypes, InBufPtrs, InStructTempSlots);
1043881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    }
10447ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
10457ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    // Populate the actual call to kernel().
10467ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    llvm::SmallVector<llvm::Value*, 8> RootArgs;
10477ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
10489296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala    // Calculate the current input and output pointers.
1049881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
1050881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Output
1051881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
1052900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutPtr = nullptr;
1053bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    if (CastedOutBasePtr) {
10547b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
1055f039d98d346006442b61255a2889b8513a8cd56fYong Chen
1056f039d98d346006442b61255a2889b8513a8cd56fYong Chen      if (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING) {
1057f039d98d346006442b61255a2889b8513a8cd56fYong Chen        OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffset);
1058f039d98d346006442b61255a2889b8513a8cd56fYong Chen      } else {
1059f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // Treat x86 output buffer as byte[], get indexed pointer with explicit
1060f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // byte offset computed using a datalayout based on
1061f039d98d346006442b61255a2889b8513a8cd56fYong Chen        // X86_CUSTOM_DL_STRING, then bitcast it to actual output type.
1062f039d98d346006442b61255a2889b8513a8cd56fYong Chen        uint64_t OutStep = DL.getTypeAllocSize(OutTy->getPointerElementType());
1063f039d98d346006442b61255a2889b8513a8cd56fYong Chen        llvm::Value *OutOffsetInBytes = Builder.CreateMul(OutOffset, llvm::ConstantInt::get(Int32Ty, OutStep));
1064f039d98d346006442b61255a2889b8513a8cd56fYong Chen        OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffsetInBytes);
1065f039d98d346006442b61255a2889b8513a8cd56fYong Chen        OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
1066f039d98d346006442b61255a2889b8513a8cd56fYong Chen      }
1067bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
1068e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes      if (PassOutByPointer) {
1069881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        RootArgs.push_back(OutPtr);
1070881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes      }
10714102bec56151fb5d9c962fb298412f34a6eacaa8Tobias Grosser    }
10727b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser
1073881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Inputs
107474a4b08235990916911b8fe758d656c1171faf26Stephen Hines
1075083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    if (NumInPtrArguments > 0) {
1076e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInPtrArguments,
1077f039d98d346006442b61255a2889b8513a8cd56fYong Chen                       InTypes, InBufPtrs, InStructTempSlots, IV, RootArgs);
10787ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
10797ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
108028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
10817ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
1082bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
10837ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
1084e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    if (OutPtr && !PassOutByPointer) {
10859296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala      RetVal->setName("call.result");
108618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
10879c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      if (gEnableRsTbaa) {
10889c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines        Store->setMetadata("tbaa", TBAAAllocation);
10899c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      }
10907ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
10917ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
1092db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return true;
1093db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
1094db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1095e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Certain categories of functions that make up a general
1096e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // reduce-style kernel are called directly from the driver with no
1097e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // expansion needed.  For a function in such a category, we need to
1098e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // promote linkage from static to external, to ensure that the
1099e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // function is visible to the driver in the dynamic symbol table.
1100e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // This promotion is safe because we don't have any kind of cross
1101e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // translation unit linkage model (except for linking against
1102e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // RenderScript libraries), so we do not risk name clashes.
11039fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross  bool PromoteReduceFunction(const char *Name, FunctionSet &PromotedFunctions) {
1104e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    if (!Name)  // a presumably-optional function that is not present
1105e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      return false;
1106e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1107e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Function *Fn = Module->getFunction(Name);
1108e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    bccAssert(Fn != nullptr);
1109e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    if (PromotedFunctions.insert(Fn).second) {
1110e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      bccAssert(Fn->getLinkage() == llvm::GlobalValue::InternalLinkage);
1111e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      Fn->setLinkage(llvm::GlobalValue::ExternalLinkage);
1112e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      return true;
1113e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    }
1114e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1115e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    return false;
1116e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
1117e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1118e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // Expand the accumulator function for a general reduce-style kernel.
1119e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1120e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // The input is a function of the form
1121e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1122e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //   define void @func(accumType* %accum, foo1 in1[, ... fooN inN] [, special arguments])
1123e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1124e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // where all arguments except the first are the same as for a foreach kernel.
1125e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1126e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // The input accumulator function gets expanded into a function of the form
1127e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1128e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //   define void @func.expand(%RsExpandKernelDriverInfoPfx* %p, i32 %x1, i32 %x2, accumType* %accum)
1129e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1130e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // which performs a serial accumulaion of elements [x1, x2) into *%accum.
1131e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1132e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // In pseudocode, @func.expand does:
1133e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1134e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //   for (i = %x1; i < %x2; ++i) {
1135e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //     func(%accum,
1136e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //          *((foo1 *)p->inPtr[0] + i)[, ... *((fooN *)p->inPtr[N-1] + i)
1137e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //          [, p] [, i] [, p->current.y] [, p->current.z]);
1138e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //   }
1139e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  //
1140e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  // This is very similar to foreach kernel expansion with no output.
11419fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross  bool ExpandReduceAccumulator(llvm::Function *FnAccumulator, uint32_t Signature, size_t NumInputs) {
1142e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    ALOGV("Expanding accumulator %s for general reduce kernel",
1143e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross          FnAccumulator->getName().str().c_str());
1144e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1145e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Create TBAA meta-data.
1146e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript,
1147e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                 *TBAAAllocation, *TBAAPointer;
1148e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::MDBuilder MDHelper(*Context);
1149e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAARenderScriptDistinct =
1150e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      MDHelper.createTBAARoot(kRenderScriptTBAARootName);
1151e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
1152e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        TBAARenderScriptDistinct);
1153e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
1154e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                       TBAARenderScript);
1155e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
1156e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                      TBAAAllocation, 0);
1157e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
1158e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                                                    TBAARenderScript);
1159e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
1160e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1161e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    auto AccumulatorArgIter = FnAccumulator->arg_begin();
1162e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1163e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Create empty accumulator function.
1164e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Function *FnExpandedAccumulator =
11659fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross        createEmptyExpandedReduceAccumulator(FnAccumulator->getName(),
11669fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross                                             (AccumulatorArgIter++)->getType());
1167e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1168e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Extract the expanded accumulator's parameters.  It is
11699fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross    // guaranteed by createEmptyExpandedReduceAccumulator that
1170e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // there will be 4 parameters.
11719fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross    bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceAccumulatorParams);
1172e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    auto ExpandedAccumulatorArgIter = FnExpandedAccumulator->arg_begin();
1173e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Arg_p     = &*(ExpandedAccumulatorArgIter++);
1174e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Arg_x1    = &*(ExpandedAccumulatorArgIter++);
1175e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Arg_x2    = &*(ExpandedAccumulatorArgIter++);
1176e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::Value *Arg_accum = &*(ExpandedAccumulatorArgIter++);
1177e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1178e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Construct the actual function body.
1179f229c40f0d2da19985e68955aef1a06ce4121e63Pirama Arumuga Nainar    llvm::IRBuilder<> Builder(&*FnExpandedAccumulator->getEntryBlock().begin());
1180e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1181e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Create the loop structure.
1182e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
11834165d29822fc7caf81e435995ff6189608fc0323Dean De Leo    llvm::Value *IndVar;
1184e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    createLoop(Builder, Arg_x1, Arg_x2, &IndVar);
1185e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1186e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
1187e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    const int CalleeArgsContextIdx =
1188e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross        ExpandSpecialArguments(Signature, IndVar, Arg_p, Builder, CalleeArgs,
1189e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                               [](){}, LoopHeader->getTerminator());
1190e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1191f039d98d346006442b61255a2889b8513a8cd56fYong Chen    llvm::SmallVector<llvm::Type*,  8> InTypes;
1192e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
1193e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
1194e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, AccumulatorArgIter, NumInputs,
1195f039d98d346006442b61255a2889b8513a8cd56fYong Chen                              InTypes, InBufPtrs, InStructTempSlots);
1196e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1197e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Populate the actual call to the original accumulator.
1198e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    llvm::SmallVector<llvm::Value*, 8> RootArgs;
1199e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    RootArgs.push_back(Arg_accum);
1200f039d98d346006442b61255a2889b8513a8cd56fYong Chen    ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInputs, InTypes, InBufPtrs, InStructTempSlots,
1201e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross                     IndVar, RootArgs);
1202e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *FnAccumulator, Builder);
1203e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    Builder.CreateCall(FnAccumulator, RootArgs);
1204e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1205e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    return true;
1206e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross  }
1207e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1208dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  // Create a combiner function for a general reduce-style kernel that lacks one,
1209dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  // by calling the accumulator function.
1210dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  //
1211dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  // The accumulator function must be of the form
1212dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  //
1213dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  //   define void @accumFn(accumType* %accum, accumType %in)
1214dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  //
1215dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  // A combiner function will be generated of the form
1216dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  //
1217dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  //   define void @accumFn.combiner(accumType* %accum, accumType* %other) {
1218dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  //     %1 = load accumType, accumType* %other
1219dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  //     call void @accumFn(accumType* %accum, accumType %1);
1220dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  //   }
12219fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross  bool CreateReduceCombinerFromAccumulator(llvm::Function *FnAccumulator) {
1222dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    ALOGV("Creating combiner from accumulator %s for general reduce kernel",
1223dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross          FnAccumulator->getName().str().c_str());
1224dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
1225dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    using llvm::Attribute;
1226dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
1227dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    bccAssert(FnAccumulator->arg_size() == 2);
1228dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    auto AccumulatorArgIter = FnAccumulator->arg_begin();
1229dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::Value *AccumulatorArg_accum = &*(AccumulatorArgIter++);
1230dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::Value *AccumulatorArg_in    = &*(AccumulatorArgIter++);
1231dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::Type *AccumulatorArgType = AccumulatorArg_accum->getType();
1232dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    bccAssert(AccumulatorArgType->isPointerTy());
1233dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
1234dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
1235dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::FunctionType *CombinerType =
1236dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross        llvm::FunctionType::get(VoidTy, { AccumulatorArgType, AccumulatorArgType }, false);
1237dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::Function *FnCombiner =
1238dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross        llvm::Function::Create(CombinerType, llvm::GlobalValue::ExternalLinkage,
12399fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross                               nameReduceCombinerFromAccumulator(FnAccumulator->getName()),
1240dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross                               Module);
1241dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
1242dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    auto CombinerArgIter = FnCombiner->arg_begin();
1243dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
1244dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::Argument *CombinerArg_accum = &(*CombinerArgIter++);
1245dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    CombinerArg_accum->setName("accum");
1246dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    CombinerArg_accum->addAttr(llvm::AttributeSet::get(*Context, CombinerArg_accum->getArgNo() + 1,
1247dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross                                                       llvm::makeArrayRef(Attribute::NoCapture)));
1248dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
1249dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::Argument *CombinerArg_other = &(*CombinerArgIter++);
1250dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    CombinerArg_other->setName("other");
1251dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    CombinerArg_other->addAttr(llvm::AttributeSet::get(*Context, CombinerArg_other->getArgNo() + 1,
1252dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross                                                       llvm::makeArrayRef(Attribute::NoCapture)));
1253dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
1254dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::BasicBlock *BB = llvm::BasicBlock::Create(*Context, "BB", FnCombiner);
1255dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    llvm::IRBuilder<> Builder(BB);
1256dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
1257dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    if (AccumulatorArg_in->getType()->isPointerTy()) {
1258dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      // Types of sufficient size get passed by pointer-to-copy rather
1259dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      // than passed by value.  An accumulator cannot take a pointer
1260dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      // at the user level; so if we see a pointer here, we know that
1261dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      // we have a pass-by-pointer-to-copy case.
1262dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      llvm::Type *ElementType = AccumulatorArg_in->getType()->getPointerElementType();
1263dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      llvm::Value *TempMem = Builder.CreateAlloca(ElementType, nullptr, "caller_copy");
1264dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      Builder.CreateStore(Builder.CreateLoad(CombinerArg_other), TempMem);
1265dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      Builder.CreateCall(FnAccumulator, { CombinerArg_accum, TempMem });
1266dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    } else {
1267dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      llvm::Value *TypeAdjustedOther = CombinerArg_other;
1268dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      if (AccumulatorArgType->getPointerElementType() != AccumulatorArg_in->getType()) {
1269dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross        // Call lowering by frontend has done some type coercion
1270dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross        TypeAdjustedOther = Builder.CreatePointerCast(CombinerArg_other,
1271dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross                                                      AccumulatorArg_in->getType()->getPointerTo(),
1272dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross                                                      "cast");
1273dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      }
1274dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      llvm::Value *DerefOther = Builder.CreateLoad(TypeAdjustedOther);
1275dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      Builder.CreateCall(FnAccumulator, { CombinerArg_accum, DerefOther });
1276dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    }
1277dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    Builder.CreateRetVoid();
1278dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
1279dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    return true;
1280dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross  }
1281dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross
128218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// @brief Checks if pointers to allocation internals are exposed
128318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  ///
128418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// This function verifies if through the parameters passed to the kernel
128518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// or through calls to the runtime library the script gains access to
128618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// pointers pointing to data within a RenderScript Allocation.
128718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// If we know we control all loads from and stores to data within
128818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// RenderScript allocations and if we know the run-time internal accesses
128918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// are all annotated with RenderScript TBAA metadata, only then we
129018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// can safely use TBAA to distinguish between generic and from-allocation
129118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// pointers.
1292bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  bool allocPointersExposed(llvm::Module &Module) {
129318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Old style kernel function can expose pointers to elements within
129418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // allocations.
129518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // TODO: Extend analysis to allow simple cases of old-style kernels.
129625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    for (size_t i = 0; i < mExportForEachCount; ++i) {
129725eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      const char *Name = mExportForEachNameList[i];
129825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      uint32_t Signature = mExportForEachSignatureList[i];
1299bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (Module.getFunction(Name) &&
1300d88177580db4ddedf680854c51db333c97eabc59Stephen Hines          !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
130118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
130218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
130318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
130418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
130518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Check for library functions that expose a pointer to an Allocation or
130618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // that are not yet annotated with RenderScript-specific tbaa information.
1307e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala    static const std::vector<const char *> Funcs{
1308e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAt(...)
1309e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsGetElementAt13rs_allocationj",
1310e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsGetElementAt13rs_allocationjj",
1311e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsGetElementAt13rs_allocationjjj",
1312e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1313e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsSetElementAt()
1314e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsSetElementAt13rs_allocationPvj",
1315e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsSetElementAt13rs_allocationPvjj",
1316e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsSetElementAt13rs_allocationPvjjj",
1317e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1318e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAtYuv_uchar_Y()
1319e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj",
1320e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1321e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAtYuv_uchar_U()
1322e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj",
1323e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1324e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAtYuv_uchar_V()
1325e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj",
1326e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala    };
1327e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1328e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala    for (auto FI : Funcs) {
1329e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      llvm::Function *Function = Module.getFunction(FI);
133018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1331bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (!Function) {
1332e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala        ALOGE("Missing run-time function '%s'", FI);
133318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
133418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
133518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1336bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (Function->getNumUses() > 0) {
133718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
133818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
133918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
134018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
134118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    return false;
134218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  }
134318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
134418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
134518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  ///
134618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// The TBAA metadata used to annotate loads/stores from RenderScript
1347e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// Allocations is generated in a separate TBAA tree with a
1348354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// "RenderScript Distinct TBAA" root node. LLVM does assume may-alias for
1349354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// all nodes in unrelated alias analysis trees. This function makes the
1350354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// "RenderScript TBAA" node (which is parented by the Distinct TBAA root),
1351e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With
1352e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// the connected trees every access to an Allocation is resolved to
1353e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// must-alias if compared to a normal C/C++ access.
1354bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
1355bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDBuilder MDHelper(*Context);
1356354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScriptDistinct =
1357354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines      MDHelper.createTBAARoot("RenderScript Distinct TBAA");
1358354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScript = MDHelper.createTBAANode(
1359354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines        "RenderScript TBAA", TBAARenderScriptDistinct);
1360bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDNode *TBAARoot     = MDHelper.createTBAARoot("Simple C/C++ TBAA");
1361354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    TBAARenderScript->replaceOperandWith(1, TBAARoot);
136218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  }
136318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1364bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  virtual bool runOnModule(llvm::Module &Module) {
1365bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bool Changed  = false;
1366bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    this->Module  = &Module;
13674e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    Context = &Module.getContext();
1368bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
13694e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    buildTypes();
1370bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
1371bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bcinfo::MetadataExtractor me(&Module);
137225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    if (!me.extract()) {
137325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      ALOGE("Could not extract metadata from module!");
137425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      return false;
137525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    }
13764e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
13774e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Expand forEach_* style kernels.
137825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachCount = me.getExportForEachSignatureCount();
137925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachNameList = me.getExportForEachNameList();
138025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachSignatureList = me.getExportForEachSignatureList();
1381db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
138225eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    for (size_t i = 0; i < mExportForEachCount; ++i) {
138325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      const char *name = mExportForEachNameList[i];
138425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      uint32_t signature = mExportForEachSignatureList[i];
1385bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Function *kernel = Module.getFunction(name);
1386cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser      if (kernel) {
1387d88177580db4ddedf680854c51db333c97eabc59Stephen Hines        if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
13884e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala          Changed |= ExpandForEach(kernel, signature);
1389acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
1390acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        } else if (kernel->getReturnType()->isVoidTy()) {
13914e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala          Changed |= ExpandOldStyleForEach(kernel, signature);
1392acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
1393acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        } else {
1394acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // There are some graphics root functions that are not
1395acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // expanded, but that will be called directly. For those
1396acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // functions, we can not set the linkage to internal.
1397acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        }
1398cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines      }
1399db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
1400db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1401e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    // Process general reduce_* style functions.
14029fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross    const size_t ExportReduceCount = me.getExportReduceCount();
14039fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross    const bcinfo::MetadataExtractor::Reduce *ExportReduceList = me.getExportReduceList();
1404e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    //   Note that functions can be shared between kernels
1405dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross    FunctionSet PromotedFunctions, ExpandedAccumulators, AccumulatorsForCombiners;
1406e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
14079fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross    for (size_t i = 0; i < ExportReduceCount; ++i) {
14089fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross      Changed |= PromoteReduceFunction(ExportReduceList[i].mInitializerName, PromotedFunctions);
14099fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross      Changed |= PromoteReduceFunction(ExportReduceList[i].mCombinerName, PromotedFunctions);
14109fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross      Changed |= PromoteReduceFunction(ExportReduceList[i].mOutConverterName, PromotedFunctions);
1411e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
1412e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      // Accumulator
14139fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross      llvm::Function *accumulator = Module.getFunction(ExportReduceList[i].mAccumulatorName);
1414e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      bccAssert(accumulator != nullptr);
1415e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross      if (ExpandedAccumulators.insert(accumulator).second)
14169fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross        Changed |= ExpandReduceAccumulator(accumulator,
14179fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross                                           ExportReduceList[i].mSignature,
14189fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross                                           ExportReduceList[i].mInputCount);
14199fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross      if (!ExportReduceList[i].mCombinerName) {
1420dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross        if (AccumulatorsForCombiners.insert(accumulator).second)
14219fa4d4480252ecfe08c97bc35888360b1e19ec99David Gross          Changed |= CreateReduceCombinerFromAccumulator(accumulator);
1422dd33eb89cfb81b152ae39f9afadb3c73a00fd46aDavid Gross      }
1423e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross    }
1424e32af52d4be2bb80783404d99fa338b1143dbc9aDavid Gross
14254e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (gEnableRsTbaa && !allocPointersExposed(Module)) {
1426bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      connectRenderScriptTBAAMetadata(Module);
142718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
142818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1429cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    return Changed;
1430db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
1431db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1432db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  virtual const char *getPassName() const {
14334e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    return "forEach_* and reduce_* function expansion";
1434db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
1435db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
14364e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala}; // end RSKernelExpandPass
1437db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
14387a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end anonymous namespace
14397a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
14404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walachar RSKernelExpandPass::ID = 0;
14414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walastatic llvm::RegisterPass<RSKernelExpandPass> X("kernelexp", "Kernel Expand Pass");
1442db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1443db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace bcc {
1444db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
14454165d29822fc7caf81e435995ff6189608fc0323Dean De Leoconst char BCC_INDEX_VAR_NAME[] = "rsIndex";
14464165d29822fc7caf81e435995ff6189608fc0323Dean De Leo
14477a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaollvm::ModulePass *
14484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt WalacreateRSKernelExpandPass(bool pEnableStepOpt) {
14494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  return new RSKernelExpandPass(pEnableStepOpt);
14507a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao}
1451db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
14527a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end namespace bcc
1453