RSKernelExpand.cpp revision 4e7a50685ae18a24087f6f2a51c604e71fab69e2
1db169187dea4602e4ad32058762d23d474753fd0Stephen Hines/*
2db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Copyright 2012, The Android Open Source Project
3db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
4db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Licensed under the Apache License, Version 2.0 (the "License");
5db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * you may not use this file except in compliance with the License.
6db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * You may obtain a copy of the License at
7db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
8db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *     http://www.apache.org/licenses/LICENSE-2.0
9db169187dea4602e4ad32058762d23d474753fd0Stephen Hines *
10db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * Unless required by applicable law or agreed to in writing, software
11db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * distributed under the License is distributed on an "AS IS" BASIS,
12db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * See the License for the specific language governing permissions and
14db169187dea4602e4ad32058762d23d474753fd0Stephen Hines * limitations under the License.
15db169187dea4602e4ad32058762d23d474753fd0Stephen Hines */
16db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
176e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines#include "bcc/Assert.h"
18e198abec6c5e3eab380ccf6897b0a0b9c2dd92ddStephen Hines#include "bcc/Renderscript/RSTransforms.h"
197a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
207a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao#include <cstdlib>
2133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross#include <functional>
227a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
23b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DerivedTypes.h>
24b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Function.h>
25b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Instructions.h>
26b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/IRBuilder.h>
2718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser#include <llvm/IR/MDBuilder.h>
28b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Module.h>
29c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include <llvm/Pass.h>
307ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines#include <llvm/Support/raw_ostream.h>
31b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/DataLayout.h>
32cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser#include <llvm/IR/Function.h>
33b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines#include <llvm/IR/Type.h>
34806075b3a54af826fea78490fb213d8a0784138eTobias Grosser#include <llvm/Transforms/Utils/BasicBlockUtils.h>
35c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang
36c72c4ddfcd79c74f70713da91a69569451b5c19eZonr Chang#include "bcc/Config/Config.h"
37ef73a242762bcd8113b9b65ceccbe7d909b5acbcZonr Chang#include "bcc/Support/Log.h"
38db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
39d88177580db4ddedf680854c51db333c97eabc59Stephen Hines#include "bcinfo/MetadataExtractor.h"
40d88177580db4ddedf680854c51db333c97eabc59Stephen Hines
414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala#ifndef __DISABLE_ASSERTS
424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala// Only used in bccAssert()
434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst int kNumExpandedForeachParams = 4;
444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst int kNumExpandedReduceParams = 3;
454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala#endif
464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst char kRenderScriptTBAARootName[] = "RenderScript Distinct TBAA";
484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaconst char kRenderScriptTBAANodeName[] = "RenderScript TBAA";
49bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
507a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaousing namespace bcc;
517a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
52db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace {
537a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
54354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hinesstatic const bool gEnableRsTbaa = true;
559c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines
564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala/* RSKernelExpandPass - This pass operates on functions that are able
574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * to be called via rsForEach(), "foreach_<NAME>", or
584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * "reduce_<NAME>". We create an inner loop for the function to be
594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * invoked over the appropriate data cells of the input/output
604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * allocations (adjusting other relevant parameters as we go). We
614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * support doing this for any forEach or reduce style compute
624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * kernels. The new function name is the original function name
634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * followed by ".expand". Note that we still generate code for the
644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala * original function.
657a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao */
664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walaclass RSKernelExpandPass : public llvm::ModulePass {
6733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grosspublic:
68db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  static char ID;
69db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
7033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Grossprivate:
71e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  static const size_t RS_KERNEL_INPUT_LIMIT = 8; // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h
72e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
73e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  enum RsLaunchDimensionsField {
74e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldX,
75e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldY,
76e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldZ,
77e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldLod,
78e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldFace,
79e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldArray,
80e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
81e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsFieldCount
82e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  };
83e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
84e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  enum RsExpandKernelDriverInfoPfxField {
85e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInPtr,
86e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInStride,
87e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldInLen,
88e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutPtr,
89e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutStride,
90e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldOutLen,
91e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldDim,
92e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldCurrent,
93e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldUsr,
94e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldUsLenr,
95e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
96e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxFieldCount
97e44a3525b9703739534c3b62d7d1af4c95649a38David Gross  };
9833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
99bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::Module *Module;
100bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::LLVMContext *Context;
101bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
102bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  /*
1034e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala   * Pointers to LLVM type information for the the function signatures
1044e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala   * for expanded functions. These must be re-calculated for each module
1054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala   * the pass is run on.
106bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes   */
1074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  llvm::FunctionType *ExpandedForEachType, *ExpandedReduceType;
108db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
10925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  uint32_t mExportForEachCount;
11025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  const char **mExportForEachNameList;
11125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines  const uint32_t *mExportForEachSignatureList;
112cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines
1134e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  uint32_t mExportReduceCount;
1144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  const char **mExportReduceNameList;
1154e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
1162b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // Turns on optimization of allocation stride values.
1172b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  bool mEnableStepOpt;
1182b04086acbef6520ae2c54a868b1271abf053122Stephen Hines
119bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  uint32_t getRootSignature(llvm::Function *Function) {
120db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    const llvm::NamedMDNode *ExportForEachMetadata =
121bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes        Module->getNamedMetadata("#rs_export_foreach");
122db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
123db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (!ExportForEachMetadata) {
124db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      llvm::SmallVector<llvm::Type*, 8> RootArgTys;
125bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      for (llvm::Function::arg_iterator B = Function->arg_begin(),
126bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                                        E = Function->arg_end();
127db169187dea4602e4ad32058762d23d474753fd0Stephen Hines           B != E;
128db169187dea4602e4ad32058762d23d474753fd0Stephen Hines           ++B) {
129db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        RootArgTys.push_back(B->getType());
130db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      }
131db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
132db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // For pre-ICS bitcode, we may not have signature information. In that
133db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // case, we use the size of the RootArgTys to select the number of
134db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      // arguments.
135db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      return (1 << RootArgTys.size()) - 1;
136db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
137db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1387ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    if (ExportForEachMetadata->getNumOperands() == 0) {
1397ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      return 0;
1407ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
1417ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
1426e9e89d1ed049ec931bb1000948c12698e6c5484Stephen Hines    bccAssert(ExportForEachMetadata->getNumOperands() > 0);
143db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
144cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // We only handle the case for legacy root() functions here, so this is
145cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // hard-coded to look at only the first such function.
146db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
147900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    if (SigNode != nullptr && SigNode->getNumOperands() == 1) {
1481bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines      llvm::Metadata *SigMD = SigNode->getOperand(0);
1491bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines      if (llvm::MDString *SigS = llvm::dyn_cast<llvm::MDString>(SigMD)) {
1501bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines        llvm::StringRef SigString = SigS->getString();
151db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        uint32_t Signature = 0;
152db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        if (SigString.getAsInteger(10, Signature)) {
153db169187dea4602e4ad32058762d23d474753fd0Stephen Hines          ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
154db169187dea4602e4ad32058762d23d474753fd0Stephen Hines          return 0;
155db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        }
156db169187dea4602e4ad32058762d23d474753fd0Stephen Hines        return Signature;
157db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      }
158db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
159db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
160db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return 0;
161db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
162db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
163429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray  bool isStepOptSupported(llvm::Type *AllocType) {
164429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
165429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
166429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
167429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
168429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (mEnableStepOpt) {
169429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
170429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
171429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
172429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType == VoidPtrTy) {
173429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
174429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
175429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
176429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (!PT) {
177429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
178429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
179429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
180429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // remaining conditions are 64-bit only
181429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (VoidPtrTy->getPrimitiveSizeInBits() == 32) {
182429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return true;
183429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
184429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
185429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // coerce suggests an upconverted struct type, which we can't support
186429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) {
187429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
188429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
189429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
190429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported
191429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2);
192429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128);
193429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (AllocType == V2xi64Ty || AllocType == Int128Ty) {
194429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray      return false;
195429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    }
196429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
197429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    return true;
198429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray  }
199429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray
2002b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // Get the actual value we should use to step through an allocation.
2017b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  //
2027b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // Normally the value we use to step through an allocation is given to us by
2037b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // the driver. However, for certain primitive data types, we can derive an
2047b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // integer constant for the step value. We use this integer constant whenever
2057b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  // possible to allow further compiler optimizations to take place.
2067b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser  //
207b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines  // DL - Target Data size/layout information.
2082b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // T - Type of allocation (should be a pointer).
2092b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  // OrigStep - Original step increment (root.expand() input from driver).
210bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
2112b04086acbef6520ae2c54a868b1271abf053122Stephen Hines                            llvm::Value *OrigStep) {
212b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines    bccAssert(DL);
213bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bccAssert(AllocType);
2142b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    bccAssert(OrigStep);
215bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
216429d94acbc64480d4f75233b66b4824cdc8b19c0Tim Murray    if (isStepOptSupported(AllocType)) {
2172b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      llvm::Type *ET = PT->getElementType();
218b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines      uint64_t ETSize = DL->getTypeAllocSize(ET);
219bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
2202b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      return llvm::ConstantInt::get(Int32Ty, ETSize);
2212b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    } else {
2222b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      return OrigStep;
2232b04086acbef6520ae2c54a868b1271abf053122Stephen Hines    }
2242b04086acbef6520ae2c54a868b1271abf053122Stephen Hines  }
2252b04086acbef6520ae2c54a868b1271abf053122Stephen Hines
226097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes  /// Builds the types required by the pass for the given context.
227bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  void buildTypes(void) {
228e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    // Create the RsLaunchDimensionsTy and RsExpandKernelDriverInfoPfxTy structs.
229bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
230e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8Ty                   = llvm::Type::getInt8Ty(*Context);
231e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8PtrTy                = Int8Ty->getPointerTo();
232e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int8PtrArrayInputLimitTy = llvm::ArrayType::get(Int8PtrTy, RS_KERNEL_INPUT_LIMIT);
233e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32Ty                  = llvm::Type::getInt32Ty(*Context);
234e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32ArrayInputLimitTy   = llvm::ArrayType::get(Int32Ty, RS_KERNEL_INPUT_LIMIT);
235e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *VoidPtrTy                = llvm::Type::getInt8PtrTy(*Context);
236e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *Int32Array4Ty            = llvm::ArrayType::get(Int32Ty, 4);
237097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
238097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes    /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h:
239db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *
240e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * struct RsLaunchDimensions {
241e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t x;
242db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   uint32_t y;
243db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     *   uint32_t z;
244e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t lod;
245e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t face;
246e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *   uint32_t array[4];
247e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * };
248e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     */
249e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::SmallVector<llvm::Type*, RsLaunchDimensionsFieldCount> RsLaunchDimensionsTypes;
250e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t x
251e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t y
252e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t z
253e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t lod
254e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t face
255e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsLaunchDimensionsTypes.push_back(Int32Array4Ty); // uint32_t array[4]
256e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::StructType *RsLaunchDimensionsTy =
257e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        llvm::StructType::create(RsLaunchDimensionsTypes, "RsLaunchDimensions");
258e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
2591d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross    /* Defined as the beginning of RsExpandKernelDriverInfo in frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h:
260e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
261e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     * struct RsExpandKernelDriverInfoPfx {
262e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT];
263e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t inStride[RS_KERNEL_INPUT_LIMIT];
264e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t inLen;
265e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
266e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT];
267e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t outStride[RS_KERNEL_INPUT_LIMIT];
268e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t outLen;
269e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
270e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // Dimension of the launch
271e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     RsLaunchDimensions dim;
272e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
273e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // The walking iterator of the launch
274e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     RsLaunchDimensions current;
275e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
276e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     const void *usr;
277e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     uint32_t usrLen;
278e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *
279e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // Items below this line are not used by the compiler and can be change in the driver.
280e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // So the compiler must assume there are an unknown number of fields of unknown type
281e44a3525b9703739534c3b62d7d1af4c95649a38David Gross     *     // beginning here.
282db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     * };
2831d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross     *
2841d93a190e62ec1588b4724ca8759216b2d0b76d7David Gross     * The name "RsExpandKernelDriverInfoPfx" is known to RSInvariantPass (RSInvariant.cpp).
285db169187dea4602e4ad32058762d23d474753fd0Stephen Hines     */
286e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::SmallVector<llvm::Type*, RsExpandKernelDriverInfoPfxFieldCount> RsExpandKernelDriverInfoPfxTypes;
287e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT]
288e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy);   // uint32_t inStride[RS_KERNEL_INPUT_LIMIT]
289e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t inLen
290e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT]
291e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy);   // uint32_t outStride[RS_KERNEL_INPUT_LIMIT]
292e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t outLen
293e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy);     // RsLaunchDimensions dim
294e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy);     // RsLaunchDimensions current
295e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy);                // const void *usr
296e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t usrLen
297e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::StructType *RsExpandKernelDriverInfoPfxTy =
298e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx");
299bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
300bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    // Create the function type for expanded kernels.
3014e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
302bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
303e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    llvm::Type *RsExpandKernelDriverInfoPfxPtrTy = RsExpandKernelDriverInfoPfxTy->getPointerTo();
3044e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // void (const RsExpandKernelDriverInfoPfxTy *p, uint32_t x1, uint32_t x2, uint32_t outstep)
3054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    ExpandedForEachType = llvm::FunctionType::get(VoidTy,
3064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala        {RsExpandKernelDriverInfoPfxPtrTy, Int32Ty, Int32Ty, Int32Ty}, false);
307bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
3084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // void (void *inBuf, void *outBuf, uint32_t len)
3094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    ExpandedReduceType = llvm::FunctionType::get(VoidTy, {VoidPtrTy, VoidPtrTy, Int32Ty}, false);
3108ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  }
3118ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
3124e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  /// @brief Create skeleton of the expanded foreach kernel.
313357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
314357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  /// This creates a function with the following signature:
315357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
316357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
3175010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes  ///         uint32_t outstep)
318357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  ///
3194e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  llvm::Function *createEmptyExpandedForEachKernel(llvm::StringRef OldName) {
320bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
3214e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      llvm::Function::Create(ExpandedForEachType,
322bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                             llvm::GlobalValue::ExternalLinkage,
323bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                             OldName + ".expand", Module);
3244e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
325bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
326bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("p");
327bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("x1");
328bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("x2");
329bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    (AI++)->setName("arg_outstep");
3304e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
3314e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala                                                       ExpandedFunction);
3324e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::IRBuilder<> Builder(Begin);
3334e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    Builder.CreateRetVoid();
3344e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    return ExpandedFunction;
3354e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  }
3364e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
3374e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // Create skeleton of the expanded reduce kernel.
3384e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
3394e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // This creates a function with the following signature:
3404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
3414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   void @func.expand(i8* nocapture %inBuf, i8* nocapture %outBuf, i32 len)
3424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
3434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  llvm::Function *createEmptyExpandedReduceKernel(llvm::StringRef OldName) {
3444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Function *ExpandedFunction =
3454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      llvm::Function::Create(ExpandedReduceType,
3464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala                             llvm::GlobalValue::ExternalLinkage,
3474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala                             OldName + ".expand", Module);
3484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(ExpandedFunction->arg_size() == kNumExpandedReduceParams);
3494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
3504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
3514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
3524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    using llvm::Attribute;
3534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
3544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Argument *InBuf = &(*AI++);
3554e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    InBuf->setName("inBuf");
3564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    InBuf->addAttr(llvm::AttributeSet::get(*Context, InBuf->getArgNo() + 1, {Attribute::NoCapture}));
3574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
3584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Argument *OutBuf = &(*AI++);
3594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    OutBuf->setName("outBuf");
3604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    OutBuf->addAttr(llvm::AttributeSet::get(*Context, OutBuf->getArgNo() + 1, {Attribute::NoCapture}));
3614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
3624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    (AI++)->setName("len");
363bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
364bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
365bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes                                                       ExpandedFunction);
366806075b3a54af826fea78490fb213d8a0784138eTobias Grosser    llvm::IRBuilder<> Builder(Begin);
367806075b3a54af826fea78490fb213d8a0784138eTobias Grosser    Builder.CreateRetVoid();
368806075b3a54af826fea78490fb213d8a0784138eTobias Grosser
369bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    return ExpandedFunction;
370357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser  }
371357b58691936bef425bd315c13a2d8019d7e9c7eTobias Grosser
372e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @brief Create an empty loop
373e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
374e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// Create a loop of the form:
375e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
376e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// for (i = LowerBound; i < UpperBound; i++)
377e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///   ;
378e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
379e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// After the loop has been created, the builder is set such that
380e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// instructions can be added to the loop body.
381e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///
382e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param Builder The builder to use to build this loop. The current
383e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///                position of the builder is the position the loop
384e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  ///                will be inserted.
385e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param LowerBound The first value of the loop iterator
386e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param UpperBound The maximal value of the loop iterator
387e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @param LoopIV A reference that will be set to the loop iterator.
388e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  /// @return The BasicBlock that will be executed after the loop.
389e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
390e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser                               llvm::Value *LowerBound,
391e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser                               llvm::Value *UpperBound,
392e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser                               llvm::PHINode **LoopIV) {
393c2ca742d7d0197c52e49467862844463fb42280fDavid Gross    bccAssert(LowerBound->getType() == UpperBound->getType());
394e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
395e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
396e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    llvm::Value *Cond, *IVNext;
397e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    llvm::PHINode *IV;
398e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
399e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    CondBB = Builder.GetInsertBlock();
4001bd9f627fa0affb457507e86b0b6684c695fe726Stephen Hines    AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), nullptr, nullptr);
401bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
402e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
403e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // if (LowerBound < Upperbound)
404e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto LoopHeader
405e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // else
406e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto AfterBB
407e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    CondBB->getTerminator()->eraseFromParent();
408e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.SetInsertPoint(CondBB);
409e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser    Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
410e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
411e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
412e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
413e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // iv.next = iv + 1
414e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // if (iv.next < Upperbound)
415e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto LoopHeader
416e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    // else
417e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    //   goto AfterBB
418e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.SetInsertPoint(HeaderBB);
419e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
420e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    IV->addIncoming(LowerBound, CondBB);
421e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
422e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    IV->addIncoming(IVNext, HeaderBB);
423e87a0518647d1f9c5249d6990c67737e0fb579e9Tobias Grosser    Cond = Builder.CreateICmpULT(IVNext, UpperBound);
424e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
425e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    AfterBB->setName("Exit");
426e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
427e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    *LoopIV = IV;
428e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser    return AfterBB;
429e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser  }
430e4a73f68e1b338881adf682c458e0b4b92ecd91eTobias Grosser
43128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // Finish building the outgoing argument list for calling a ForEach-able function.
43228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //
43328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // ArgVector - on input, the non-special arguments
43428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //             on output, the non-special arguments combined with the special arguments
43528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //               from SpecialArgVector
43628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // SpecialArgVector - special arguments (from ExpandSpecialArguments())
43728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // SpecialArgContextIdx - return value of ExpandSpecialArguments()
43828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //                          (position of context argument in SpecialArgVector)
43928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // CalleeFunction - the ForEach-able function being called
44028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // Builder - for inserting code into the caller function
44128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  template<unsigned int ArgVectorLen, unsigned int SpecialArgVectorLen>
44228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  void finishArgList(      llvm::SmallVector<llvm::Value *, ArgVectorLen>        &ArgVector,
44328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     const llvm::SmallVector<llvm::Value *, SpecialArgVectorLen> &SpecialArgVector,
44428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     const int SpecialArgContextIdx,
44528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     const llvm::Function &CalleeFunction,
44628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                     llvm::IRBuilder<> &CallerBuilder) {
44728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    /* The context argument (if any) is a pointer to an opaque user-visible type that differs from
44828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     * the RsExpandKernelDriverInfoPfx type used in the function we are generating (although the
44928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     * two types represent the same thing).  Therefore, we must introduce a pointer cast when
45028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     * generating a call to the kernel function.
45128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross     */
45228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    const int ArgContextIdx =
45328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross        SpecialArgContextIdx >= 0 ? (ArgVector.size() + SpecialArgContextIdx) : SpecialArgContextIdx;
45428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    ArgVector.append(SpecialArgVector.begin(), SpecialArgVector.end());
45528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    if (ArgContextIdx >= 0) {
45628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      llvm::Type *ContextArgType = nullptr;
45728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      int ArgIdx = ArgContextIdx;
45828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      for (const auto &Arg : CalleeFunction.getArgumentList()) {
45928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross        if (!ArgIdx--) {
46028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross          ContextArgType = Arg.getType();
46128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross          break;
46228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross        }
46328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      }
46428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      bccAssert(ContextArgType);
46528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      ArgVector[ArgContextIdx] = CallerBuilder.CreatePointerCast(ArgVector[ArgContextIdx], ContextArgType);
46628c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    }
46728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  }
46828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
469083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // GEPHelper() returns a SmallVector of values suitable for passing
470083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // to IRBuilder::CreateGEP(), and SmallGEPIndices is a typedef for
471083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // the returned data type. It is sized so that the SmallVector
472083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // returned by GEPHelper() never needs to do a heap allocation for
473083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // any list of GEP indices it encounters in the code.
474083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  typedef llvm::SmallVector<llvm::Value *, 3> SmallGEPIndices;
475083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
476083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // Helper for turning a list of constant integer GEP indices into a
477083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // SmallVector of llvm::Value*. The return value is suitable for
478083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // passing to a GetElementPtrInst constructor or IRBuilder::CreateGEP().
479083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //
480083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // Inputs:
481083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //   I32Args should be integers which represent the index arguments
482083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //   to a GEP instruction.
483083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //
484083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // Returns:
485083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //   Returns a SmallVector of ConstantInts.
4864e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  SmallGEPIndices GEPHelper(const std::initializer_list<int32_t> I32Args) {
487083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    SmallGEPIndices Out(I32Args.size());
488083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::IntegerType *I32Ty = llvm::Type::getInt32Ty(*Context);
489083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    std::transform(I32Args.begin(), I32Args.end(), Out.begin(),
490083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                   [I32Ty](int32_t Arg) { return llvm::ConstantInt::get(I32Ty, Arg); });
491083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    return Out;
492083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  }
493083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
4948ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosserpublic:
4954e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  RSKernelExpandPass(bool pEnableStepOpt = true)
496900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes      : ModulePass(ID), Module(nullptr), Context(nullptr),
497bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes        mEnableStepOpt(pEnableStepOpt) {
498bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
4998ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  }
5008ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
501c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines  virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
502c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines    // This pass does not use any other analysis passes, but it does
503c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines    // add/wrap the existing functions in the module (thus altering the CFG).
504c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines  }
505c754d49ee856be620e041348a9f2b3d5610a5a26Stephen Hines
50633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // Build contribution to outgoing argument list for calling a
50733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // ForEach-able function, based on the special parameters of that
50833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // function.
50933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  //
51033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // Signature - metadata bits for the signature of the ForEach-able function
51133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // X, Arg_p - values derived directly from expanded function,
51233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  //            suitable for computing arguments for the ForEach-able function
51333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // CalleeArgs - contribution is accumulated here
51433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  // Bump - invoked once for each contributed outgoing argument
515083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  // LoopHeaderInsertionPoint - an Instruction in the loop header, before which
516083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala  //                            this function can insert loop-invariant loads
51728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  //
51828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // Return value is the (zero-based) position of the context (Arg_p)
51928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // argument in the CalleeArgs vector, or a negative value if the
52028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  // context argument is not placed in the CalleeArgs vector.
52128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross  int ExpandSpecialArguments(uint32_t Signature,
52228c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::Value *X,
52328c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::Value *Arg_p,
52428c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::IRBuilder<> &Builder,
52528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross                             llvm::SmallVector<llvm::Value*, 8> &CalleeArgs,
526083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             std::function<void ()> Bump,
527083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             llvm::Instruction *LoopHeaderInsertionPoint) {
52828c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
52928c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    bccAssert(CalleeArgs.empty());
53028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
53128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    int Return = -1;
53233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) {
53333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      CalleeArgs.push_back(Arg_p);
53433cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      Bump();
53528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross      Return = CalleeArgs.size() - 1;
53633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
53733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
53833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
53933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      CalleeArgs.push_back(X);
54033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross      Bump();
54133cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
54233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
543e44a3525b9703739534c3b62d7d1af4c95649a38David Gross    if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature) ||
544e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
545083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      bccAssert(LoopHeaderInsertionPoint);
54633cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
547083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      // Y and Z are loop invariant, so they can be hoisted out of the
548083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      // loop. Set the IRBuilder insertion point to the loop header.
549083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      auto OldInsertionPoint = Builder.saveIP();
550083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      Builder.SetInsertPoint(LoopHeaderInsertionPoint);
551e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
552e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
553083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        SmallGEPIndices YValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent,
554083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          RsLaunchDimensionsFieldY}));
555083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value *YAddr = Builder.CreateInBoundsGEP(Arg_p, YValueGEP, "Y.gep");
556083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        CalleeArgs.push_back(Builder.CreateLoad(YAddr, "Y"));
557e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        Bump();
558e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      }
559e44a3525b9703739534c3b62d7d1af4c95649a38David Gross
560e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
561083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        SmallGEPIndices ZValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent,
562083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          RsLaunchDimensionsFieldZ}));
563083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value *ZAddr = Builder.CreateInBoundsGEP(Arg_p, ZValueGEP, "Z.gep");
564083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        CalleeArgs.push_back(Builder.CreateLoad(ZAddr, "Z"));
565e44a3525b9703739534c3b62d7d1af4c95649a38David Gross        Bump();
566e44a3525b9703739534c3b62d7d1af4c95649a38David Gross      }
567083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
568083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      Builder.restoreIP(OldInsertionPoint);
56933cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    }
57028c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross
57128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    return Return;
57233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross  }
57333cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
5748ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser  /* Performs the actual optimization on a selected function. On success, the
5758ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   * Module will contain a new function of the name "<NAME>.expand" that
5768ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   * invokes <NAME>() in a loop with the appropriate parameters.
5778ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser   */
5784e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  bool ExpandOldStyleForEach(llvm::Function *Function, uint32_t Signature) {
579bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    ALOGV("Expanding ForEach-able Function %s",
580bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes          Function->getName().str().c_str());
5818ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
5828ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser    if (!Signature) {
583bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      Signature = getRootSignature(Function);
5848ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser      if (!Signature) {
5858ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        // We couldn't determine how to expand this function based on its
5868ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        // function signature.
5878ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser        return false;
5888ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser      }
5898ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser    }
5908ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
591bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::DataLayout DL(Module);
5928ae4607d2dc5bc655d0c9225565d36ce2ebfc798Tobias Grosser
593bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
5944e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      createEmptyExpandedForEachKernel(Function->getName());
595db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
596bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    /*
597bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * Extract the expanded function's parameters.  It is guaranteed by
598bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * createEmptyExpandedFunction that there will be five parameters.
599bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     */
60033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
6014e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
60233cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross
603bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator ExpandedFunctionArgIter =
604bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      ExpandedFunction->arg_begin();
605db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
606bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
607bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
608bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
6095010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes    llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
610bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
611900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *InStep  = nullptr;
612900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutStep = nullptr;
613db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
614db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // Construct the actual function body.
615bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
616db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
617cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // Collect and construct the arguments for the kernel().
618db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    // Note that we load any loop-invariant arguments before entering the Loop.
619bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
620db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
621900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Type  *InTy      = nullptr;
622083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::Value *InBufPtr = nullptr;
623d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
624083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices InStepGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInStride, 0}));
625083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      llvm::LoadInst *InStepArg  = Builder.CreateLoad(
626083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        Builder.CreateInBoundsGEP(Arg_p, InStepGEP, "instep_addr.gep"), "instep_addr");
627e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
628bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      InTy = (FunctionArgIter++)->getType();
629e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes      InStep = getStepValue(&DL, InTy, InStepArg);
630e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
6312b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      InStep->setName("instep");
632e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes
633083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices InputAddrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr, 0}));
634083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      InBufPtr = Builder.CreateLoad(
635083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        Builder.CreateInBoundsGEP(Arg_p, InputAddrGEP, "input_buf.gep"), "input_buf");
636db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
637db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
638900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Type *OutTy = nullptr;
639900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutBasePtr = nullptr;
640d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
641bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      OutTy = (FunctionArgIter++)->getType();
642b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
6432b04086acbef6520ae2c54a868b1271abf053122Stephen Hines      OutStep->setName("outstep");
644083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0}));
645083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep"));
646db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
647db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
648900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *UsrData = nullptr;
649d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
650bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
651083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      llvm::Value *UsrDataPointerAddr = Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldUsr);
652083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      UsrData = Builder.CreatePointerCast(Builder.CreateLoad(UsrDataPointerAddr), UsrDataTy);
653db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      UsrData->setName("UsrData");
654db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
655db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
656083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
65733cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    llvm::PHINode *IV;
65833cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    createLoop(Builder, Arg_x1, Arg_x2, &IV);
659097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
66033cda5cf335afc6aa2dbe02062bc9e6649e1f87cDavid Gross    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
66128c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    const int CalleeArgsContextIdx = ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
662083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                                                            [&FunctionArgIter]() { FunctionArgIter++; },
663083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                                                            LoopHeader->getTerminator());
664db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
665bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bccAssert(FunctionArgIter == Function->arg_end());
666db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
667cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    // Populate the actual call to kernel().
668db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    llvm::SmallVector<llvm::Value*, 8> RootArgs;
669db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
670900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *InPtr  = nullptr;
671900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutPtr = nullptr;
672db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
673ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    // Calculate the current input and output pointers
67402f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    //
675ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    // We always calculate the input/output pointers with a GEP operating on i8
67602f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // values and only cast at the very end to OutTy. This is because the step
67702f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // between two values is given in bytes.
67802f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    //
67902f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // TODO: We could further optimize the output by using a GEP operation of
68002f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    // type 'OutTy' in cases where the element type of the allocation allows.
68102f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    if (OutBasePtr) {
68202f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
68302f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      OutOffset = Builder.CreateMul(OutOffset, OutStep);
684083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutPtr = Builder.CreateInBoundsGEP(OutBasePtr, OutOffset);
68502f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser      OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
68602f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    }
687bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
688083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    if (InBufPtr) {
689ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
690ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      InOffset = Builder.CreateMul(InOffset, InStep);
691083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      InPtr = Builder.CreateInBoundsGEP(InBufPtr, InOffset);
692ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser      InPtr = Builder.CreatePointerCast(InPtr, InTy);
693ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    }
69402f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser
695ae937ec9658c219a38d9d631d2569f19e21b50c0Tobias Grosser    if (InPtr) {
6967ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      RootArgs.push_back(InPtr);
697db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
698db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
69902f3cd69a4103f91a51a2f988f2179c885d734b5Tobias Grosser    if (OutPtr) {
7007ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines      RootArgs.push_back(OutPtr);
701db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
702db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
703db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    if (UsrData) {
704db169187dea4602e4ad32058762d23d474753fd0Stephen Hines      RootArgs.push_back(UsrData);
705db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
706db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
70728c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
708db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
709bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    Builder.CreateCall(Function, RootArgs);
710db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
7117ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    return true;
7127ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines  }
7137ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
7144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  /* Expand a pass-by-value foreach kernel.
7157ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines   */
7164e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  bool ExpandForEach(llvm::Function *Function, uint32_t Signature) {
717d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
718bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
7197ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
7204e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // TODO: Refactor this to share functionality with ExpandOldStyleForEach.
721bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::DataLayout DL(Module);
7227ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
723bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function *ExpandedFunction =
7244e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      createEmptyExpandedForEachKernel(Function->getName());
7257ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
726bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    /*
727bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * Extract the expanded function's parameters.  It is guaranteed by
728bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     * createEmptyExpandedFunction that there will be five parameters.
729bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes     */
730881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
7314e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
732881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
733bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Function::arg_iterator ExpandedFunctionArgIter =
734bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      ExpandedFunction->arg_begin();
735bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
736bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
737bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
738bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
7395010f641d1df6bc3447646ca7ef837410fb9b3dcChris Wailes    llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
7407ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
7417ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    // Construct the actual function body.
742bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
7437ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
74418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Create TBAA meta-data.
745354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript,
746354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines                 *TBAAAllocation, *TBAAPointer;
747bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDBuilder MDHelper(*Context);
74814588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien
749354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    TBAARenderScriptDistinct =
7504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      MDHelper.createTBAARoot(kRenderScriptTBAARootName);
7514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
752354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines        TBAARenderScriptDistinct);
753e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
754e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                       TBAARenderScript);
755e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
756e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                      TBAAAllocation, 0);
757e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
758e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes                                                    TBAARenderScript);
75914588cf0babf4596f1bcf4ea05ddd2ceb458a916Logan Chien    TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
76018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
761881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    /*
762881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     * Collect and construct the arguments for the kernel().
763881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     *
764881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     * Note that we load any loop-invariant arguments before entering the Loop.
765881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes     */
766083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    size_t NumRemainingInputs = Function->arg_size();
7677ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
768881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // No usrData parameter on kernels.
769881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    bccAssert(
770881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
771881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
772881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    llvm::Function::arg_iterator ArgIter = Function->arg_begin();
773881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
774881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Check the return type
775bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::Type     *OutTy            = nullptr;
776bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::Value    *OutStep          = nullptr;
777bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::LoadInst *OutBasePtr       = nullptr;
778bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::Value    *CastedOutBasePtr = nullptr;
779881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
780e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    bool PassOutByPointer = false;
781881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
782d88177580db4ddedf680854c51db333c97eabc59Stephen Hines    if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
783bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Type *OutBaseTy = Function->getReturnType();
784881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
78574a4b08235990916911b8fe758d656c1171faf26Stephen Hines      if (OutBaseTy->isVoidTy()) {
786e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes        PassOutByPointer = true;
787881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        OutTy = ArgIter->getType();
788881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
789881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        ArgIter++;
790083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        --NumRemainingInputs;
79174a4b08235990916911b8fe758d656c1171faf26Stephen Hines      } else {
79274a4b08235990916911b8fe758d656c1171faf26Stephen Hines        // We don't increment Args, since we are using the actual return type.
793881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        OutTy = OutBaseTy->getPointerTo();
79474a4b08235990916911b8fe758d656c1171faf26Stephen Hines      }
795881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
796b730e239619a546d93e5926ea92d698ab77ec7f6Stephen Hines      OutStep = getStepValue(&DL, OutTy, Arg_outstep);
79774a4b08235990916911b8fe758d656c1171faf26Stephen Hines      OutStep->setName("outstep");
798083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0}));
799083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep"));
800097ca14d8b6908d0e5b7f8f38011cad4fb94bc26Chris Wailes
8019c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      if (gEnableRsTbaa) {
8029c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines        OutBasePtr->setMetadata("tbaa", TBAAPointer);
8039c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      }
80450f5eb4b27ce6dd2a02cf389ac2acfa95c6939f1Tim Murray
805bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray      CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out");
80674a4b08235990916911b8fe758d656c1171faf26Stephen Hines    }
80774a4b08235990916911b8fe758d656c1171faf26Stephen Hines
808bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::SmallVector<llvm::Type*,  8> InTypes;
809bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    llvm::SmallVector<llvm::Value*, 8> InSteps;
810083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
811d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala    llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
812881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
813083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    bccAssert(NumRemainingInputs <= RS_KERNEL_INPUT_LIMIT);
814881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
815083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // Create the loop structure.
816083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
817083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::PHINode *IV;
818083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    createLoop(Builder, Arg_x1, Arg_x2, &IV);
819881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
820083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
821083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    const int CalleeArgsContextIdx =
822083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
823083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             [&NumRemainingInputs]() { --NumRemainingInputs; },
824083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                             LoopHeader->getTerminator());
825083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
826083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // After ExpandSpecialArguments() gets called, NumRemainingInputs
827083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // counts the number of arguments to the kernel that correspond to
828083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // an array entry from the InPtr field of the DriverInfo
829083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    // structure.
830083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    const size_t NumInPtrArguments = NumRemainingInputs;
831083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
832083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    if (NumInPtrArguments > 0) {
833083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      // Extract information about input slots and step sizes. The work done
834083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      // here is loop-invariant, so we can hoist the operations out of the loop.
835083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      auto OldInsertionPoint = Builder.saveIP();
836083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      Builder.SetInsertPoint(LoopHeader->getTerminator());
837083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
838083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      for (size_t InputIndex = 0; InputIndex < NumInPtrArguments; ++InputIndex, ArgIter++) {
839083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        SmallGEPIndices InStepGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInStride,
840083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          static_cast<int32_t>(InputIndex)}));
841083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value *InStepAddr = Builder.CreateInBoundsGEP(Arg_p, InStepGEP, "instep_addr.gep");
842083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::LoadInst *InStepArg = Builder.CreateLoad(InStepAddr, "instep_addr");
843881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
844d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        llvm::Type *InType = ArgIter->getType();
845326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes
846326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes        /*
847d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala         * AArch64 calling conventions dictate that structs of sufficient size
848d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala         * get passed by pointer instead of passed by value.  This, combined
849d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala         * with the fact that we don't allow kernels to operate on pointer
850d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala         * data means that if we see a kernel with a pointer parameter we know
851083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala         * that it is a struct input that has been promoted.  As such we don't
852d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala         * need to convert its type to a pointer.  Later we will need to know
853d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala         * to create a temporary copy on the stack, so we save this information
854d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala         * in InStructTempSlots.
855326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes         */
856d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        if (auto PtrType = llvm::dyn_cast<llvm::PointerType>(InType)) {
857d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala          llvm::Type *ElementType = PtrType->getElementType();
858083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          InStructTempSlots.push_back(Builder.CreateAlloca(ElementType, nullptr,
859083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala                                                           "input_struct_slot"));
860d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        } else {
861d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala          InType = InType->getPointerTo();
862d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala          InStructTempSlots.push_back(nullptr);
863d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        }
864326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes
865d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        llvm::Value *InStep = getStepValue(&DL, InType, InStepArg);
866881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
867d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        InStep->setName("instep");
868881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
869083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        SmallGEPIndices InBufPtrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr,
870083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          static_cast<int32_t>(InputIndex)}));
871083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value    *InBufPtrAddr = Builder.CreateInBoundsGEP(Arg_p, InBufPtrGEP, "input_buf.gep");
872083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::LoadInst *InBufPtr = Builder.CreateLoad(InBufPtrAddr, "input_buf");
873083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value    *CastInBufPtr = Builder.CreatePointerCast(InBufPtr, InType, "casted_in");
874d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        if (gEnableRsTbaa) {
875083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala          InBufPtr->setMetadata("tbaa", TBAAPointer);
876d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        }
877881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
878d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        InTypes.push_back(InType);
879d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        InSteps.push_back(InStep);
880083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        InBufPtrs.push_back(CastInBufPtr);
881881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes      }
882083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala
883083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      Builder.restoreIP(OldInsertionPoint);
884881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    }
8857ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
8867ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    // Populate the actual call to kernel().
8877ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    llvm::SmallVector<llvm::Value*, 8> RootArgs;
8887ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
8899296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala    // Calculate the current input and output pointers.
890881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
891881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Output
892881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
893900c6c1f08f7c572125d7d39abe0f0f9eafbfa14Chris Wailes    llvm::Value *OutPtr = nullptr;
894bb73b74a9f6ad26c2ab30557bfe6916a44ed75f6Tim Murray    if (CastedOutBasePtr) {
8957b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser      llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
896083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffset);
897bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
898e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes      if (PassOutByPointer) {
899881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        RootArgs.push_back(OutPtr);
900881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes      }
9014102bec56151fb5d9c962fb298412f34a6eacaa8Tobias Grosser    }
9027b6629024395bf085ca64fcd5c69d2ded95e8d18Tobias Grosser
903881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes    // Inputs
90474a4b08235990916911b8fe758d656c1171faf26Stephen Hines
905083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala    if (NumInPtrArguments > 0) {
906881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes      llvm::Value *Offset = Builder.CreateSub(IV, Arg_x1);
907881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
908083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala      for (size_t Index = 0; Index < NumInPtrArguments; ++Index) {
909083ef3c3d1991e9fa443ff76065ec6528dd4730cMatt Wala        llvm::Value *InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], Offset);
910326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes        llvm::Value *Input;
911326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes
9129296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala        llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
9139296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala
9149296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala        if (gEnableRsTbaa) {
9159296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala          InputLoad->setMetadata("tbaa", TBAAAllocation);
9169296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala        }
9179296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala
918d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala        if (llvm::Value *TemporarySlot = InStructTempSlots[Index]) {
919d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala          // Pass a pointer to a temporary on the stack, rather than
920d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala          // passing a pointer to the original value. We do not want
921d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala          // the kernel to potentially modify the input data.
922d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala
9239296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala          // Note: don't annotate with TBAA, since the kernel might
9249296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala          // have its own TBAA annotations for the pointer argument.
9259296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala          Builder.CreateStore(InputLoad, TemporarySlot);
926d96c9fae6bcbaa7d8bee0dab2d75beb8400248ffMatt Wala          Input = TemporarySlot;
927326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes        } else {
928326d02a9f3cfe30caa21e5c2aecbd4c85112b363Chris Wailes          Input = InputLoad;
929881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        }
930881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes
931881cda4f5b793a1e7f7d33cf4a31efc3fdbba4fdChris Wailes        RootArgs.push_back(Input);
9329c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      }
9337ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
9347ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
93528c17995669687cecc96eaba232eecd3b65f5bbfDavid Gross    finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
9367ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
937bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
9387ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
939e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes    if (OutPtr && !PassOutByPointer) {
9409296edce10caec9c901f24b65e7d54c8ffe9131cMatt Wala      RetVal->setName("call.result");
94118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
9429c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      if (gEnableRsTbaa) {
9439c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines        Store->setMetadata("tbaa", TBAAAllocation);
9449c5263e00d89dfdd25d3a1706eb319e8529ec604Stephen Hines      }
9457ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines    }
9467ae3a828fade4e30f7940415949f33ac1f15dc77Stephen Hines
947db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    return true;
948db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
949db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
9504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // Expand a reduce-style kernel function.
9514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
9524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // The input is a kernel which represents a binary operation,
9534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // of the form
9544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
9554e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   define foo @func(foo %a, foo %b),
9564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
9574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // (More generally, it can be of the forms
9584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
9594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   define void @func(foo* %ret, foo* %a, foo* %b)
9604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   define void @func(foo* %ret, foo1 %a, foo1 %b)
9614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   define foo1 @func(foo2 %a, foo2 %b)
9624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
9634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // as a result of argument / return value conversions. Here, "foo1"
9644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // and "foo2" refer to possibly coerced types, and the coerced
9654e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // argument type may be different from the coerced return type. See
9664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // "Note on coercion" below.)
9674e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
9684e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // Note also, we do not expect to encounter any case when the
9694e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // arguments are promoted to pointers but the return value is
9704e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // unpromoted to pointer, e.g.
9714e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
9724e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   define foo1 @func(foo* %a, foo* %b)
9734e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
9744e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // and we will throw an assertion in this case.)
9754e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
9764e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // The input kernel gets expanded into a kernel of the form
9774e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
9784e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   define void @func.expand(i8* %inBuf, i8* outBuf, i32 len)
9794e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
9804e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // which performs a serial reduction of `len` elements from `inBuf`,
9814e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // and stores the result into `outBuf`. In pseudocode, @func.expand
9824e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // does:
9834e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
9844e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   inArr := (foo *)inBuf;
9854e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   accum := inArr[0];
9864e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   for (i := 1; i < len; ++i) {
9874e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //     accum := foo(accum, inArr[i]);
9884e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   }
9894e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   *(foo *)outBuf := accum;
9904e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
9914e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // Note on coercion
9924e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
9934e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // Both the return value and the argument types may undergo internal
9944e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // coercion in clang as part of call lowering. As a result, the
9954e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // return value type may differ from the argument type even if the
9964e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // types in the RenderScript signaure are the same. For instance, the
9974e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // kernel
9984e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
9994e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   int3 add(int3 a, int3 b) { return a + b; }
10004e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10014e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // gets lowered by clang as
10024e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10034e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //   define <3 x i32> @add(<4 x i32> %a.coerce, <4 x i32> %b.coerce)
10044e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // under AArch64. The details of this process are found in clang,
10064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // lib/CodeGen/TargetInfo.cpp, under classifyArgumentType() and
10074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // classifyReturnType() in ARMABIInfo, AArch64ABIInfo. If the value
10084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // is passed by pointer, then the pointed-to type is not coerced.
10094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  //
10104e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // Since we lack the original type information, this code does loads
10114e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // and stores of allocation data by way of pointers to the coerced
10124e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  // type.
10134e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  bool ExpandReduce(llvm::Function *Function) {
10144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(Function);
10154e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
10164e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    ALOGV("Expanding reduce kernel %s", Function->getName().str().c_str());
10174e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
10184e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::DataLayout DL(Module);
10194e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
10204e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // TBAA Metadata
10214e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript, *TBAAAllocation;
10224e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::MDBuilder MDHelper(*Context);
10234e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
10244e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    TBAARenderScriptDistinct =
10254e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      MDHelper.createTBAARoot(kRenderScriptTBAARootName);
10264e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
10274e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala        TBAARenderScriptDistinct);
10284e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
10294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala                                                       TBAARenderScript);
10304e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
10314e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala                                                      TBAAAllocation, 0);
10324e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
10334e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Function *ExpandedFunction =
10344e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      createEmptyExpandedReduceKernel(Function->getName());
10354e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
10364e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Extract the expanded kernel's parameters.  It is guaranteed by
10374e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // createEmptyExpandedFunction that there will be 3 parameters.
10384e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    auto ExpandedFunctionArgIter = ExpandedFunction->arg_begin();
10394e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
10404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *Arg_inBuf  = &*(ExpandedFunctionArgIter++);
10414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *Arg_outBuf = &*(ExpandedFunctionArgIter++);
10424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *Arg_len    = &*(ExpandedFunctionArgIter++);
10434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
10444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(Function->arg_size() == 2 || Function->arg_size() == 3);
10454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
10464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Check if, instead of returning a value, the original kernel has
10474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // a pointer parameter which points to a temporary buffer into
10484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // which the return value gets written.
10494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    const bool ReturnValuePointerStyle = (Function->arg_size() == 3);
10504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(Function->getReturnType()->isVoidTy() == ReturnValuePointerStyle);
10514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
10524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Check if, instead of being passed by value, the inputs to the
10534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // original kernel are passed by pointer.
10544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    auto FirstArgIter = Function->arg_begin();
10554e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // The second argument is always an input to the original kernel.
10564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    auto SecondArgIter = std::next(FirstArgIter);
10574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    const bool InputsPointerStyle = SecondArgIter->getType()->isPointerTy();
10584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
10594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Get the output type (i.e. return type of the original kernel).
10604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::PointerType *OutPtrTy = nullptr;
10614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Type *OutTy = nullptr;
10624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (ReturnValuePointerStyle) {
10634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      OutPtrTy = llvm::dyn_cast<llvm::PointerType>(FirstArgIter->getType());
10644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      bccAssert(OutPtrTy && "Expected a pointer parameter to kernel");
10654e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      OutTy = OutPtrTy->getElementType();
10664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    } else {
10674e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      OutTy = Function->getReturnType();
10684e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      bccAssert(!OutTy->isVoidTy());
10694e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      OutPtrTy = OutTy->getPointerTo();
10704e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
10714e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
10724e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Get the input type (type of the arguments to the original
10734e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // kernel). Some input types are different from the output type,
10744e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // due to explicit coercion that the compiler performs when
10754e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // lowering the parameters. See "Note on coercion" above.
10764e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::PointerType *InPtrTy;
10774e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Type *InTy;
10784e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (InputsPointerStyle) {
10794e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      InPtrTy = llvm::dyn_cast<llvm::PointerType>(SecondArgIter->getType());
10804e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      bccAssert(InPtrTy && "Expected a pointer parameter to kernel");
10814e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      bccAssert(ReturnValuePointerStyle);
10824e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      bccAssert(std::next(SecondArgIter)->getType() == InPtrTy &&
10834e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala                "Input type mismatch");
10844e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      InTy = InPtrTy->getElementType();
10854e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    } else {
10864e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      InTy = SecondArgIter->getType();
10874e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      InPtrTy = InTy->getPointerTo();
10884e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      if (!ReturnValuePointerStyle) {
10894e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala        bccAssert(InTy == FirstArgIter->getType() && "Input type mismatch");
10904e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      } else {
10914e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala        bccAssert(InTy == std::next(SecondArgIter)->getType() &&
10924e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala                  "Input type mismatch");
10934e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      }
10944e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
10954e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
10964e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // The input type should take up the same amount of space in
10974e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // memory as the output type.
10984e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    bccAssert(DL.getTypeAllocSize(InTy) == DL.getTypeAllocSize(OutTy));
10994e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11004e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Construct the actual function body.
11014e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
11024e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11034e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Cast input and output buffers to appropriate types.
11044e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *InBuf = Builder.CreatePointerCast(Arg_inBuf, InPtrTy);
11054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *OutBuf = Builder.CreatePointerCast(Arg_outBuf, OutPtrTy);
11064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Create a slot to pass temporary results back. This needs to be
11084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // separate from the accumulator slot because the kernel may mark
11094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // the return value slot as noalias.
11104e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *ReturnBuf = nullptr;
11114e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (ReturnValuePointerStyle) {
11124e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      ReturnBuf = Builder.CreateAlloca(OutTy, nullptr, "ret.tmp");
11134e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
11144e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11154e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Create a slot to hold the second input if the inputs are passed
11164e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // by pointer to the original kernel. We cannot directly pass a
11174e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // pointer to the input buffer, because the kernel may modify its
11184e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // inputs.
11194e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *SecondInputTempBuf = nullptr;
11204e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (InputsPointerStyle) {
11214e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      SecondInputTempBuf = Builder.CreateAlloca(InTy, nullptr, "in.tmp");
11224e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
11234e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11244e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Create a slot to accumulate temporary results, and fill it with
11254e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // the first value.
11264e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *AccumBuf = Builder.CreateAlloca(OutTy, nullptr, "accum");
11274e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Cast to OutPtrTy before loading, since AccumBuf has type OutPtrTy.
11284e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::LoadInst *FirstElementLoad = Builder.CreateLoad(
11294e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      Builder.CreatePointerCast(InBuf, OutPtrTy));
11304e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (gEnableRsTbaa) {
11314e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      FirstElementLoad->setMetadata("tbaa", TBAAAllocation);
11324e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
11334e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Memory operations with AccumBuf shouldn't be marked with
11344e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // RenderScript TBAA, since this might conflict with TBAA metadata
11354e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // in the kernel function when AccumBuf is passed by pointer.
11364e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    Builder.CreateStore(FirstElementLoad, AccumBuf);
11374e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11384e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Loop body
11394e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Create the loop structure. Note that the first input in the input buffer
11414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // has already been accumulated, so that we start at index 1.
11424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::PHINode *IndVar;
11434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *Start = llvm::ConstantInt::get(Arg_len->getType(), 1);
11444e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::BasicBlock *Exit = createLoop(Builder, Start, Arg_len, &IndVar);
11454e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11464e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *InputPtr = Builder.CreateInBoundsGEP(InBuf, IndVar, "next_input.gep");
11474e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11484e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Set up arguments and call the original (unexpanded) kernel.
11494e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    //
11504e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // The original kernel can have at most 3 arguments, which is
11514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // achieved when the signature looks like:
11524e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    //
11534e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    //    define void @func(foo* %ret, bar %a, bar %b)
11544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    //
11554e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // (bar can be one of foo/foo.coerce/foo*).
11564e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::SmallVector<llvm::Value *, 3> KernelArgs;
11574e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (ReturnValuePointerStyle) {
11594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      KernelArgs.push_back(ReturnBuf);
11604e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
11614e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11624e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (InputsPointerStyle) {
11634e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      bccAssert(ReturnValuePointerStyle);
11644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      // Because the return buffer is copied back into the
11654e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      // accumulator, it's okay if the accumulator is overwritten.
11664e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      KernelArgs.push_back(AccumBuf);
11674e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11684e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      llvm::LoadInst *InputLoad = Builder.CreateLoad(InputPtr);
11694e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      if (gEnableRsTbaa) {
11704e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala        InputLoad->setMetadata("tbaa", TBAAAllocation);
11714e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      }
11724e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      Builder.CreateStore(InputLoad, SecondInputTempBuf);
11734e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11744e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      KernelArgs.push_back(SecondInputTempBuf);
11754e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    } else {
11764e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      // InPtrTy may be different from OutPtrTy (the type of
11774e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      // AccumBuf), so first cast the accumulator buffer to the
11784e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      // pointer type corresponding to the input argument type.
11794e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      KernelArgs.push_back(
11804e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala        Builder.CreateLoad(Builder.CreatePointerCast(AccumBuf, InPtrTy)));
11814e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11824e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      llvm::LoadInst *LoadedArg = Builder.CreateLoad(InputPtr);
11834e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      if (gEnableRsTbaa) {
11844e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala        LoadedArg->setMetadata("tbaa", TBAAAllocation);
11854e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      }
11864e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      KernelArgs.push_back(LoadedArg);
11874e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
11884e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11894e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::Value *RetVal = Builder.CreateCall(Function, KernelArgs);
11904e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11914e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    const uint64_t ElementSize = DL.getTypeStoreSize(OutTy);
11924e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    const uint64_t ElementAlign = DL.getABITypeAlignment(OutTy);
11934e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
11944e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Store the output in the accumulator.
11954e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (ReturnValuePointerStyle) {
11964e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      Builder.CreateMemCpy(AccumBuf, ReturnBuf, ElementSize, ElementAlign);
11974e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    } else {
11984e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      Builder.CreateStore(RetVal, AccumBuf);
11994e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
12004e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12014e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Loop exit
12024e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    Builder.SetInsertPoint(Exit, Exit->begin());
12034e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12044e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::LoadInst *OutputLoad = Builder.CreateLoad(AccumBuf);
12054e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    llvm::StoreInst *OutputStore = Builder.CreateStore(OutputLoad, OutBuf);
12064e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (gEnableRsTbaa) {
12074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      OutputStore->setMetadata("tbaa", TBAAAllocation);
12084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
12094e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
12104e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    return true;
12114e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  }
12124e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
121318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// @brief Checks if pointers to allocation internals are exposed
121418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  ///
121518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// This function verifies if through the parameters passed to the kernel
121618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// or through calls to the runtime library the script gains access to
121718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// pointers pointing to data within a RenderScript Allocation.
121818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// If we know we control all loads from and stores to data within
121918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// RenderScript allocations and if we know the run-time internal accesses
122018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// are all annotated with RenderScript TBAA metadata, only then we
122118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// can safely use TBAA to distinguish between generic and from-allocation
122218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// pointers.
1223bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  bool allocPointersExposed(llvm::Module &Module) {
122418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Old style kernel function can expose pointers to elements within
122518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // allocations.
122618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // TODO: Extend analysis to allow simple cases of old-style kernels.
122725eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    for (size_t i = 0; i < mExportForEachCount; ++i) {
122825eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      const char *Name = mExportForEachNameList[i];
122925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      uint32_t Signature = mExportForEachSignatureList[i];
1230bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (Module.getFunction(Name) &&
1231d88177580db4ddedf680854c51db333c97eabc59Stephen Hines          !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
123218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
123318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
123418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
123518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
123618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // Check for library functions that expose a pointer to an Allocation or
123718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    // that are not yet annotated with RenderScript-specific tbaa information.
1238e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala    static const std::vector<const char *> Funcs{
1239e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAt(...)
1240e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsGetElementAt13rs_allocationj",
1241e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsGetElementAt13rs_allocationjj",
1242e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsGetElementAt13rs_allocationjjj",
1243e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1244e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsSetElementAt()
1245e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsSetElementAt13rs_allocationPvj",
1246e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsSetElementAt13rs_allocationPvjj",
1247e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z14rsSetElementAt13rs_allocationPvjjj",
1248e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1249e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAtYuv_uchar_Y()
1250e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj",
1251e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1252e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAtYuv_uchar_U()
1253e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj",
1254e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1255e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      // rsGetElementAtYuv_uchar_V()
1256e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      "_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj",
1257e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala    };
1258e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala
1259e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala    for (auto FI : Funcs) {
1260e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala      llvm::Function *Function = Module.getFunction(FI);
126118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1262bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (!Function) {
1263e2423780bf482d2eeefa9a958b4793f865cba845Matt Wala        ALOGE("Missing run-time function '%s'", FI);
126418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
126518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
126618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1267bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      if (Function->getNumUses() > 0) {
126818a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser        return true;
126918a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser      }
127018a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
127118a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
127218a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    return false;
127318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  }
127418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
127518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
127618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  ///
127718a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  /// The TBAA metadata used to annotate loads/stores from RenderScript
1278e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// Allocations is generated in a separate TBAA tree with a
1279354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// "RenderScript Distinct TBAA" root node. LLVM does assume may-alias for
1280354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// all nodes in unrelated alias analysis trees. This function makes the
1281354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines  /// "RenderScript TBAA" node (which is parented by the Distinct TBAA root),
1282e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With
1283e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// the connected trees every access to an Allocation is resolved to
1284e10b8641813c701159c6615faaa58dd1a9b31ce5Chris Wailes  /// must-alias if compared to a normal C/C++ access.
1285bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
1286bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDBuilder MDHelper(*Context);
1287354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScriptDistinct =
1288354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines      MDHelper.createTBAARoot("RenderScript Distinct TBAA");
1289354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    llvm::MDNode *TBAARenderScript = MDHelper.createTBAANode(
1290354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines        "RenderScript TBAA", TBAARenderScriptDistinct);
1291bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    llvm::MDNode *TBAARoot     = MDHelper.createTBAARoot("Simple C/C++ TBAA");
1292354d1c132ad7e1ff6fdb0da95443245848a0601fStephen Hines    TBAARenderScript->replaceOperandWith(1, TBAARoot);
129318a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser  }
129418a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1295bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes  virtual bool runOnModule(llvm::Module &Module) {
1296bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bool Changed  = false;
1297bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    this->Module  = &Module;
12984e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    Context = &Module.getContext();
1299bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
13004e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    buildTypes();
1301bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes
1302bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes    bcinfo::MetadataExtractor me(&Module);
130325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    if (!me.extract()) {
130425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      ALOGE("Could not extract metadata from module!");
130525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      return false;
130625eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    }
13074e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
13084e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Expand forEach_* style kernels.
130925eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachCount = me.getExportForEachSignatureCount();
131025eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachNameList = me.getExportForEachNameList();
131125eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    mExportForEachSignatureList = me.getExportForEachSignatureList();
1312db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
131325eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines    for (size_t i = 0; i < mExportForEachCount; ++i) {
131425eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      const char *name = mExportForEachNameList[i];
131525eb586bb055ae07c7e82a2b1bdbd6936641580cStephen Hines      uint32_t signature = mExportForEachSignatureList[i];
1316bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      llvm::Function *kernel = Module.getFunction(name);
1317cd5b657c39a348bd7652a19c5be5035e9df44a42Tobias Grosser      if (kernel) {
1318d88177580db4ddedf680854c51db333c97eabc59Stephen Hines        if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
13194e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala          Changed |= ExpandForEach(kernel, signature);
1320acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
1321acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        } else if (kernel->getReturnType()->isVoidTy()) {
13224e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala          Changed |= ExpandOldStyleForEach(kernel, signature);
1323acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
1324acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        } else {
1325acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // There are some graphics root functions that are not
1326acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // expanded, but that will be called directly. For those
1327acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser          // functions, we can not set the linkage to internal.
1328acde6013e9c448547e59eed04afd2adbd9681a3aTobias Grosser        }
1329cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines      }
1330db169187dea4602e4ad32058762d23d474753fd0Stephen Hines    }
1331db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
13324e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    // Expand reduce_* style kernels.
13334e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    mExportReduceCount = me.getExportReduceCount();
13344e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    mExportReduceNameList = me.getExportReduceNameList();
13354e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
13364e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    for (size_t i = 0; i < mExportReduceCount; ++i) {
13374e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      llvm::Function *kernel = Module.getFunction(mExportReduceNameList[i]);
13384e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      if (kernel) {
13394e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala        Changed |= ExpandReduce(kernel);
13404e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala      }
13414e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    }
13424e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala
13434e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    if (gEnableRsTbaa && !allocPointersExposed(Module)) {
1344bdbff6e600b0d834e4770f65c7d2df93d7ef305cChris Wailes      connectRenderScriptTBAAMetadata(Module);
134518a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser    }
134618a38a3fc6fad8355891b771dd3c6537fa8699ecTobias Grosser
1347cc366e573e31f43a6101fd6e04b90c6afdc3b7a7Stephen Hines    return Changed;
1348db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
1349db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1350db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  virtual const char *getPassName() const {
13514e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala    return "forEach_* and reduce_* function expansion";
1352db169187dea4602e4ad32058762d23d474753fd0Stephen Hines  }
1353db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
13544e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala}; // end RSKernelExpandPass
1355db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
13567a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end anonymous namespace
13577a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao
13584e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walachar RSKernelExpandPass::ID = 0;
13594e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Walastatic llvm::RegisterPass<RSKernelExpandPass> X("kernelexp", "Kernel Expand Pass");
1360db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
1361db169187dea4602e4ad32058762d23d474753fd0Stephen Hinesnamespace bcc {
1362db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
13637a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liaollvm::ModulePass *
13644e7a50685ae18a24087f6f2a51c604e71fab69e2Matt WalacreateRSKernelExpandPass(bool pEnableStepOpt) {
13654e7a50685ae18a24087f6f2a51c604e71fab69e2Matt Wala  return new RSKernelExpandPass(pEnableStepOpt);
13667a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao}
1367db169187dea4602e4ad32058762d23d474753fd0Stephen Hines
13687a66e6cbb1ae32cd56b19822c4e66560deb857dbShih-wei Liao} // end namespace bcc
1369