1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef RSD_CPU_CORE_H
18#define RSD_CPU_CORE_H
19
20#include "rsd_cpu.h"
21#include "rsSignal.h"
22#include "rsContext.h"
23#include "rsCppUtils.h"
24#include "rsElement.h"
25#include "rsScriptC.h"
26#include "rsCpuCoreRuntime.h"
27
28namespace android {
29namespace renderscript {
30
31// Whether the CPU we're running on supports SIMD instructions
32extern bool gArchUseSIMD;
33
34// Function types found in RenderScript code
35typedef void (*ReduceAccumulatorFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint8_t *accum);
36typedef void (*ReduceCombinerFunc_t)(uint8_t *accum, const uint8_t *other);
37typedef void (*ReduceInitializerFunc_t)(uint8_t *accum);
38typedef void (*ReduceOutConverterFunc_t)(uint8_t *out, const uint8_t *accum);
39typedef void (*ForEachFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint32_t outStride);
40typedef void (*InvokeFunc_t)(void *params);
41typedef void (*InitOrDtorFunc_t)(void);
42typedef int  (*RootFunc_t)(void);
43
44struct ReduceDescription {
45    ReduceAccumulatorFunc_t  accumFunc;  // expanded accumulator function
46    ReduceInitializerFunc_t  initFunc;   // user initializer function
47    ReduceCombinerFunc_t     combFunc;   // user combiner function
48    ReduceOutConverterFunc_t outFunc;    // user outconverter function
49    size_t                   accumSize;  // accumulator datum size, in bytes
50};
51
52// Internal driver callback used to execute a kernel
53typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);
54
55class RsdCpuScriptImpl;
56class RsdCpuReferenceImpl;
57
58struct ScriptTLSStruct {
59    android::renderscript::Context * mContext;
60    const android::renderscript::Script * mScript;
61    RsdCpuScriptImpl *mImpl;
62};
63
64// MTLaunchStruct passes information about a multithreaded kernel launch.
65struct MTLaunchStructCommon {
66    RsdCpuReferenceImpl *rs;
67    RsdCpuScriptImpl *script;
68
69    uint32_t mSliceSize;
70    volatile int mSliceNum;
71    bool isThreadable;
72
73    // Boundary information about the launch
74    RsLaunchDimensions start;
75    RsLaunchDimensions end;
76    // Points to MTLaunchStructForEach::fep::dim or
77    // MTLaunchStructReduce::redp::dim.
78    RsLaunchDimensions *dimPtr;
79};
80
81struct MTLaunchStructForEach : public MTLaunchStructCommon {
82    // Driver info structure
83    RsExpandKernelDriverInfo fep;
84
85    ForEachFunc_t kernel;
86    const Allocation *ains[RS_KERNEL_INPUT_LIMIT];
87    Allocation *aout[RS_KERNEL_INPUT_LIMIT];
88};
89
90struct MTLaunchStructReduce : public MTLaunchStructCommon {
91    // Driver info structure
92    RsExpandKernelDriverInfo redp;
93
94    const Allocation *ains[RS_KERNEL_INPUT_LIMIT];
95
96    ReduceAccumulatorFunc_t accumFunc;
97    ReduceInitializerFunc_t initFunc;
98    ReduceCombinerFunc_t combFunc;
99    ReduceOutConverterFunc_t outFunc;
100
101    size_t accumSize;  // accumulator datum size in bytes
102
103    size_t accumStride;  // stride between accumulators in accumAlloc (below)
104
105    // These fields are used for managing accumulator data items in a
106    // multithreaded execution.
107    //
108    // Let the number of threads be N.
109    // Let Outc be true iff there is an outconverter.
110    //
111    // accumAlloc is a pointer to a single allocation of (N - !Outc)
112    // accumulators.  (If there is no outconverter, then the output
113    // allocation acts as an accumulator.)  It is created at kernel
114    // launch time.  Within that allocation, the distance between the
115    // start of adjacent accumulators is accumStride bytes -- this
116    // might be the same as accumSize, or it might be larger, if we
117    // are attempting to avoid false sharing.
118    //
119    // accumCount is an atomic counter of how many accumulators have
120    // been grabbed by threads.  It is initialized to zero at kernel
121    // launch time.  See accumPtr for further description.
122    //
123    // accumPtr is pointer to an array of N pointers to accumulators.
124    // The array is created at kernel launch time, and each element is
125    // initialized to nullptr.  When a particular thread goes to work,
126    // that thread obtains its accumulator from its entry in this
127    // array.  If the entry is nullptr, that thread needs to obtain an
128    // accumulator, and initialize its entry in the array accordingly.
129    // It does so via atomic access (fetch-and-add) to accumCount.
130    // - If Outc, then the fetched value is used as an index into
131    //   accumAlloc.
132    // - If !Outc, then
133    //   - If the fetched value is zero, then this thread gets the
134    //     output allocation for its accumulator.
135    //   - If the fetched value is nonzero, then (fetched value - 1)
136    //     is used as an index into accumAlloc.
137    uint8_t *accumAlloc;
138    uint8_t **accumPtr;
139    uint32_t accumCount;
140
141    // Logging control
142    uint32_t logReduce;
143};
144
145class RsdCpuReferenceImpl : public RsdCpuReference {
146public:
147    ~RsdCpuReferenceImpl() override;
148    RsdCpuReferenceImpl(Context *);
149
150    void lockMutex();
151    void unlockMutex();
152
153    bool init(uint32_t version_major, uint32_t version_minor, sym_lookup_t, script_lookup_t);
154    void setPriority(int32_t priority) override;
155    virtual void launchThreads(WorkerCallback_t cbk, void *data);
156    static void * helperThreadProc(void *vrsc);
157    RsdCpuScriptImpl * setTLS(RsdCpuScriptImpl *sc);
158
159    Context * getContext() {return mRSC;}
160    uint32_t getThreadCount() const {
161        return mWorkers.mCount + 1;
162    }
163
164    // Launch foreach kernel
165    void launchForEach(const Allocation **ains, uint32_t inLen, Allocation *aout,
166                       const RsScriptCall *sc, MTLaunchStructForEach *mtls);
167
168    // Launch a general reduce kernel
169    void launchReduce(const Allocation ** ains, uint32_t inLen, Allocation *aout,
170                      MTLaunchStructReduce *mtls);
171
172    CpuScript * createScript(const ScriptC *s, char const *resName, char const *cacheDir,
173                             uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags) override;
174    CpuScript * createIntrinsic(const Script *s, RsScriptIntrinsicID iid, Element *e) override;
175    void* createScriptGroup(const ScriptGroupBase *sg) override;
176
177    const RsdCpuReference::CpuSymbol *symLookup(const char *);
178
179    RsdCpuReference::CpuScript *lookupScript(const Script *s) {
180        return mScriptLookupFn(mRSC, s);
181    }
182
183    void setSelectRTCallback(RSSelectRTCallback pSelectRTCallback) {
184        mSelectRTCallback = pSelectRTCallback;
185    }
186    RSSelectRTCallback getSelectRTCallback() {
187        return mSelectRTCallback;
188    }
189
190    virtual void setBccPluginName(const char *name) {
191        mBccPluginName.setTo(name);
192    }
193    virtual const char *getBccPluginName() const {
194        return mBccPluginName.string();
195    }
196    bool getInKernel() override { return mInKernel; }
197
198    // Set to true if we should embed global variable information in the code.
199    void setEmbedGlobalInfo(bool v) override {
200        mEmbedGlobalInfo = v;
201    }
202
203    // Returns true if we should embed global variable information in the code.
204    bool getEmbedGlobalInfo() const override {
205        return mEmbedGlobalInfo;
206    }
207
208    // Set to true if we should skip constant (immutable) global variables when
209    // potentially embedding information about globals.
210    void setEmbedGlobalInfoSkipConstant(bool v) override {
211        mEmbedGlobalInfoSkipConstant = v;
212    }
213
214    // Returns true if we should skip constant (immutable) global variables when
215    // potentially embedding information about globals.
216    bool getEmbedGlobalInfoSkipConstant() const override {
217        return mEmbedGlobalInfoSkipConstant;
218    }
219
220protected:
221    Context *mRSC;
222    uint32_t version_major;
223    uint32_t version_minor;
224    //bool mHasGraphics;
225    bool mInKernel;  // Is a parallel kernel execution underway?
226
227    struct Workers {
228        volatile int mRunningCount;
229        volatile int mLaunchCount;
230        uint32_t mCount;
231        pthread_t *mThreadId;
232        pid_t *mNativeThreadId;
233        Signal mCompleteSignal;
234        Signal *mLaunchSignals;
235        WorkerCallback_t mLaunchCallback;
236        void *mLaunchData;
237    };
238    Workers mWorkers;
239    bool mExit;
240    sym_lookup_t mSymLookupFn;
241    script_lookup_t mScriptLookupFn;
242
243    ScriptTLSStruct mTlsStruct;
244
245    RSSelectRTCallback mSelectRTCallback;
246    String8 mBccPluginName;
247
248    // Specifies whether we should embed global variable information in the
249    // code via special RS variables that can be examined later by the driver.
250    // Defaults to true.
251    bool mEmbedGlobalInfo;
252
253    // Specifies whether we should skip constant (immutable) global variables
254    // when potentially embedding information about globals.
255    // Defaults to true.
256    bool mEmbedGlobalInfoSkipConstant;
257
258    long mPageSize;
259
260    // Launch a general reduce kernel
261    void launchReduceSerial(const Allocation ** ains, uint32_t inLen, Allocation *aout,
262                            MTLaunchStructReduce *mtls);
263    void launchReduceParallel(const Allocation ** ains, uint32_t inLen, Allocation *aout,
264                              MTLaunchStructReduce *mtls);
265};
266
267
268}
269}
270
271#endif
272