1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef RSD_CPU_CORE_H
18#define RSD_CPU_CORE_H
19
20#include "rsd_cpu.h"
21#include "rsSignal.h"
22#include "rsContext.h"
23#include "rsCppUtils.h"
24#include "rsElement.h"
25#include "rsScriptC.h"
26#include "rsCpuCoreRuntime.h"
27
28#include <string>
29
30namespace android {
31namespace renderscript {
32
33// Whether the CPU we're running on supports SIMD instructions
34extern bool gArchUseSIMD;
35
36// Function types found in RenderScript code
37typedef void (*ReduceAccumulatorFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint8_t *accum);
38typedef void (*ReduceCombinerFunc_t)(uint8_t *accum, const uint8_t *other);
39typedef void (*ReduceInitializerFunc_t)(uint8_t *accum);
40typedef void (*ReduceOutConverterFunc_t)(uint8_t *out, const uint8_t *accum);
41typedef void (*ForEachFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint32_t outStride);
42typedef void (*InvokeFunc_t)(void *params);
43typedef void (*InitOrDtorFunc_t)(void);
44typedef int  (*RootFunc_t)(void);
45
46struct ReduceDescription {
47    ReduceAccumulatorFunc_t  accumFunc;  // expanded accumulator function
48    ReduceInitializerFunc_t  initFunc;   // user initializer function
49    ReduceCombinerFunc_t     combFunc;   // user combiner function
50    ReduceOutConverterFunc_t outFunc;    // user outconverter function
51    size_t                   accumSize;  // accumulator datum size, in bytes
52};
53
54// Internal driver callback used to execute a kernel
55typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);
56
57class RsdCpuScriptImpl;
58class RsdCpuReferenceImpl;
59
60struct ScriptTLSStruct {
61    android::renderscript::Context * mContext;
62    const android::renderscript::Script * mScript;
63    RsdCpuScriptImpl *mImpl;
64};
65
66// MTLaunchStruct passes information about a multithreaded kernel launch.
67struct MTLaunchStructCommon {
68    RsdCpuReferenceImpl *rs;
69    RsdCpuScriptImpl *script;
70
71    uint32_t mSliceSize;
72    volatile int mSliceNum;
73    bool isThreadable;
74
75    // Boundary information about the launch
76    RsLaunchDimensions start;
77    RsLaunchDimensions end;
78    // Points to MTLaunchStructForEach::fep::dim or
79    // MTLaunchStructReduce::redp::dim.
80    RsLaunchDimensions *dimPtr;
81};
82
83struct MTLaunchStructForEach : public MTLaunchStructCommon {
84    // Driver info structure
85    RsExpandKernelDriverInfo fep;
86
87    ForEachFunc_t kernel;
88    const Allocation *ains[RS_KERNEL_INPUT_LIMIT];
89    Allocation *aout[RS_KERNEL_INPUT_LIMIT];
90};
91
92struct MTLaunchStructReduce : public MTLaunchStructCommon {
93    // Driver info structure
94    RsExpandKernelDriverInfo redp;
95
96    const Allocation *ains[RS_KERNEL_INPUT_LIMIT];
97
98    ReduceAccumulatorFunc_t accumFunc;
99    ReduceInitializerFunc_t initFunc;
100    ReduceCombinerFunc_t combFunc;
101    ReduceOutConverterFunc_t outFunc;
102
103    size_t accumSize;  // accumulator datum size in bytes
104
105    size_t accumStride;  // stride between accumulators in accumAlloc (below)
106
107    // These fields are used for managing accumulator data items in a
108    // multithreaded execution.
109    //
110    // Let the number of threads be N.
111    // Let Outc be true iff there is an outconverter.
112    //
113    // accumAlloc is a pointer to a single allocation of (N - !Outc)
114    // accumulators.  (If there is no outconverter, then the output
115    // allocation acts as an accumulator.)  It is created at kernel
116    // launch time.  Within that allocation, the distance between the
117    // start of adjacent accumulators is accumStride bytes -- this
118    // might be the same as accumSize, or it might be larger, if we
119    // are attempting to avoid false sharing.
120    //
121    // accumCount is an atomic counter of how many accumulators have
122    // been grabbed by threads.  It is initialized to zero at kernel
123    // launch time.  See accumPtr for further description.
124    //
125    // accumPtr is pointer to an array of N pointers to accumulators.
126    // The array is created at kernel launch time, and each element is
127    // initialized to nullptr.  When a particular thread goes to work,
128    // that thread obtains its accumulator from its entry in this
129    // array.  If the entry is nullptr, that thread needs to obtain an
130    // accumulator, and initialize its entry in the array accordingly.
131    // It does so via atomic access (fetch-and-add) to accumCount.
132    // - If Outc, then the fetched value is used as an index into
133    //   accumAlloc.
134    // - If !Outc, then
135    //   - If the fetched value is zero, then this thread gets the
136    //     output allocation for its accumulator.
137    //   - If the fetched value is nonzero, then (fetched value - 1)
138    //     is used as an index into accumAlloc.
139    uint8_t *accumAlloc;
140    uint8_t **accumPtr;
141    uint32_t accumCount;
142
143    // Logging control
144    uint32_t logReduce;
145};
146
147class RsdCpuReferenceImpl : public RsdCpuReference {
148public:
149    ~RsdCpuReferenceImpl() override;
150    explicit RsdCpuReferenceImpl(Context *);
151
152    void lockMutex();
153    void unlockMutex();
154
155    bool init(uint32_t version_major, uint32_t version_minor, sym_lookup_t, script_lookup_t);
156    void setPriority(int32_t priority) override;
157    virtual void launchThreads(WorkerCallback_t cbk, void *data);
158    static void * helperThreadProc(void *vrsc);
159    RsdCpuScriptImpl * setTLS(RsdCpuScriptImpl *sc);
160
161    Context * getContext() {return mRSC;}
162    uint32_t getThreadCount() const {
163        return mWorkers.mCount + 1;
164    }
165
166    // Launch foreach kernel
167    void launchForEach(const Allocation **ains, uint32_t inLen, Allocation *aout,
168                       const RsScriptCall *sc, MTLaunchStructForEach *mtls);
169
170    // Launch a general reduce kernel
171    void launchReduce(const Allocation ** ains, uint32_t inLen, Allocation *aout,
172                      MTLaunchStructReduce *mtls);
173
174    CpuScript * createScript(const ScriptC *s, char const *resName, char const *cacheDir,
175                             uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags) override;
176    CpuScript * createIntrinsic(const Script *s, RsScriptIntrinsicID iid, Element *e) override;
177    void* createScriptGroup(const ScriptGroupBase *sg) override;
178
179    const RsdCpuReference::CpuSymbol *symLookup(const char *);
180
181    RsdCpuReference::CpuScript *lookupScript(const Script *s) {
182        return mScriptLookupFn(mRSC, s);
183    }
184
185    void setSelectRTCallback(RSSelectRTCallback pSelectRTCallback) {
186        mSelectRTCallback = pSelectRTCallback;
187    }
188    RSSelectRTCallback getSelectRTCallback() {
189        return mSelectRTCallback;
190    }
191
192    virtual void setBccPluginName(const char *name) {
193        mBccPluginName.assign(name);
194    }
195    virtual const char *getBccPluginName() const {
196        return mBccPluginName.c_str();
197    }
198    bool getInKernel() override { return mInKernel; }
199
200    // Set to true if we should embed global variable information in the code.
201    void setEmbedGlobalInfo(bool v) override {
202        mEmbedGlobalInfo = v;
203    }
204
205    // Returns true if we should embed global variable information in the code.
206    bool getEmbedGlobalInfo() const override {
207        return mEmbedGlobalInfo;
208    }
209
210    // Set to true if we should skip constant (immutable) global variables when
211    // potentially embedding information about globals.
212    void setEmbedGlobalInfoSkipConstant(bool v) override {
213        mEmbedGlobalInfoSkipConstant = v;
214    }
215
216    // Returns true if we should skip constant (immutable) global variables when
217    // potentially embedding information about globals.
218    bool getEmbedGlobalInfoSkipConstant() const override {
219        return mEmbedGlobalInfoSkipConstant;
220    }
221
222protected:
223    Context *mRSC;
224    uint32_t version_major;
225    uint32_t version_minor;
226    //bool mHasGraphics;
227    bool mInKernel;  // Is a parallel kernel execution underway?
228
229    struct Workers {
230        volatile int mRunningCount;
231        volatile int mLaunchCount;
232        uint32_t mCount;
233        pthread_t *mThreadId;
234        pid_t *mNativeThreadId;
235        Signal mCompleteSignal;
236        Signal *mLaunchSignals;
237        WorkerCallback_t mLaunchCallback;
238        void *mLaunchData;
239    };
240    Workers mWorkers;
241    bool mExit;
242    sym_lookup_t mSymLookupFn;
243    script_lookup_t mScriptLookupFn;
244
245    ScriptTLSStruct mTlsStruct;
246
247    RSSelectRTCallback mSelectRTCallback;
248    std::string mBccPluginName;
249
250    // Specifies whether we should embed global variable information in the
251    // code via special RS variables that can be examined later by the driver.
252    // Defaults to true.
253    bool mEmbedGlobalInfo;
254
255    // Specifies whether we should skip constant (immutable) global variables
256    // when potentially embedding information about globals.
257    // Defaults to true.
258    bool mEmbedGlobalInfoSkipConstant;
259
260    long mPageSize;
261
262    // Launch a general reduce kernel
263    void launchReduceSerial(const Allocation ** ains, uint32_t inLen, Allocation *aout,
264                            MTLaunchStructReduce *mtls);
265    void launchReduceParallel(const Allocation ** ains, uint32_t inLen, Allocation *aout,
266                              MTLaunchStructReduce *mtls);
267};
268
269
270} // namespace renderscript
271} // namespace android
272
273#endif
274