rsCpuCore.h revision 6760f7ba7934ddd51938a8d0206fc41c2a7cb419
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef RSD_CPU_CORE_H
18#define RSD_CPU_CORE_H
19
20#include "rsd_cpu.h"
21#include "rsSignal.h"
22#include "rsContext.h"
23#include "rsCppUtils.h"
24#include "rsElement.h"
25#include "rsScriptC.h"
26#include "rsCpuCoreRuntime.h"
27
28namespace android {
29namespace renderscript {
30
31// Whether the CPU we're running on supports SIMD instructions
32extern bool gArchUseSIMD;
33
34// Function types found in RenderScript code
35typedef void (*ReduceFunc_t)(const uint8_t *inBuf, uint8_t *outBuf, uint32_t len);
36typedef void (*ReduceNewAccumulatorFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint8_t *accum);
37typedef void (*ReduceNewCombinerFunc_t)(uint8_t *accum, const uint8_t *other);
38typedef void (*ReduceNewInitializerFunc_t)(uint8_t *accum);
39typedef void (*ReduceNewOutConverterFunc_t)(uint8_t *out, const uint8_t *accum);
40typedef void (*ForEachFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint32_t outStride);
41typedef void (*InvokeFunc_t)(void *params);
42typedef void (*InitOrDtorFunc_t)(void);
43typedef int  (*RootFunc_t)(void);
44
45struct ReduceNewDescription {
46    ReduceNewAccumulatorFunc_t  accumFunc;  // expanded accumulator function
47    ReduceNewInitializerFunc_t  initFunc;   // user initializer function
48    ReduceNewCombinerFunc_t     combFunc;   // user combiner function
49    ReduceNewOutConverterFunc_t outFunc;    // user outconverter function
50    size_t                      accumSize;  // accumulator datum size, in bytes
51};
52
53// Internal driver callback used to execute a kernel
54typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);
55
56class RsdCpuScriptImpl;
57class RsdCpuReferenceImpl;
58
59struct ScriptTLSStruct {
60    android::renderscript::Context * mContext;
61    const android::renderscript::Script * mScript;
62    RsdCpuScriptImpl *mImpl;
63};
64
65// MTLaunchStruct passes information about a multithreaded kernel launch.
66struct MTLaunchStructCommon {
67    RsdCpuReferenceImpl *rs;
68    RsdCpuScriptImpl *script;
69
70    uint32_t mSliceSize;
71    volatile int mSliceNum;
72    bool isThreadable;
73
74    // Boundary information about the launch
75    RsLaunchDimensions start;
76    RsLaunchDimensions end;
77    // Points to MTLaunchStructForEach::fep::dim or
78    // MTLaunchStructReduce::inputDim or
79    // MTLaunchStructReduceNew::redp::dim.
80    RsLaunchDimensions *dimPtr;
81};
82
83struct MTLaunchStructForEach : public MTLaunchStructCommon {
84    // Driver info structure
85    RsExpandKernelDriverInfo fep;
86
87    ForEachFunc_t kernel;
88    const Allocation *ains[RS_KERNEL_INPUT_LIMIT];
89    Allocation *aout[RS_KERNEL_INPUT_LIMIT];
90};
91
92struct MTLaunchStructReduce : public MTLaunchStructCommon {
93    ReduceFunc_t kernel;
94    const uint8_t *inBuf;
95    uint8_t *outBuf;
96    RsLaunchDimensions inputDim;
97};
98
99struct MTLaunchStructReduceNew : public MTLaunchStructCommon {
100    // Driver info structure
101    RsExpandKernelDriverInfo redp;
102
103    const Allocation *ains[RS_KERNEL_INPUT_LIMIT];
104
105    ReduceNewAccumulatorFunc_t accumFunc;
106    ReduceNewInitializerFunc_t initFunc;
107    ReduceNewCombinerFunc_t combFunc;
108    ReduceNewOutConverterFunc_t outFunc;
109
110    size_t accumSize;  // accumulator datum size in bytes
111
112    size_t accumStride;  // stride between accumulators in accumAlloc (below)
113
114    // These fields are used for managing accumulator data items in a
115    // multithreaded execution.
116    //
117    // Let the number of threads be N.
118    // Let Outc be true iff there is an outconverter.
119    //
120    // accumAlloc is a pointer to a single allocation of (N - !Outc)
121    // accumulators.  (If there is no outconverter, then the output
122    // allocation acts as an accumulator.)  It is created at kernel
123    // launch time.  Within that allocation, the distance between the
124    // start of adjacent accumulators is accumStride bytes -- this
125    // might be the same as accumSize, or it might be larger, if we
126    // are attempting to avoid false sharing.
127    //
128    // accumCount is an atomic counter of how many accumulators have
129    // been grabbed by threads.  It is initialized to zero at kernel
130    // launch time.  See accumPtr for further description.
131    //
132    // accumPtr is pointer to an array of N pointers to accumulators.
133    // The array is created at kernel launch time, and each element is
134    // initialized to nullptr.  When a particular thread goes to work,
135    // that thread obtains its accumulator from its entry in this
136    // array.  If the entry is nullptr, that thread needs to obtain an
137    // accumulator, and initialize its entry in the array accordingly.
138    // It does so via atomic access (fetch-and-add) to accumCount.
139    // - If Outc, then the fetched value is used as an index into
140    //   accumAlloc.
141    // - If !Outc, then
142    //   - If the fetched value is zero, then this thread gets the
143    //     output allocation for its accumulator.
144    //   - If the fetched value is nonzero, then (fetched value - 1)
145    //     is used as an index into accumAlloc.
146    uint8_t *accumAlloc;
147    uint8_t **accumPtr;
148    uint32_t accumCount;
149
150    // Logging control
151    uint32_t logReduce;
152};
153
154class RsdCpuReferenceImpl : public RsdCpuReference {
155public:
156    ~RsdCpuReferenceImpl() override;
157    RsdCpuReferenceImpl(Context *);
158
159    void lockMutex();
160    void unlockMutex();
161
162    bool init(uint32_t version_major, uint32_t version_minor, sym_lookup_t, script_lookup_t);
163    void setPriority(int32_t priority) override;
164    virtual void launchThreads(WorkerCallback_t cbk, void *data);
165    static void * helperThreadProc(void *vrsc);
166    RsdCpuScriptImpl * setTLS(RsdCpuScriptImpl *sc);
167
168    Context * getContext() {return mRSC;}
169    uint32_t getThreadCount() const {
170        return mWorkers.mCount + 1;
171    }
172
173    // Launch foreach kernel
174    void launchForEach(const Allocation **ains, uint32_t inLen, Allocation *aout,
175                       const RsScriptCall *sc, MTLaunchStructForEach *mtls);
176
177    // Launch a simple reduce kernel
178    void launchReduce(const Allocation *ain, Allocation *aout,
179                      MTLaunchStructReduce *mtls);
180
181    // Launch a general reduce kernel
182    void launchReduceNew(const Allocation ** ains, uint32_t inLen, Allocation *aout,
183                         MTLaunchStructReduceNew *mtls);
184
185    CpuScript * createScript(const ScriptC *s, char const *resName, char const *cacheDir,
186                             uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags) override;
187    CpuScript * createIntrinsic(const Script *s, RsScriptIntrinsicID iid, Element *e) override;
188    void* createScriptGroup(const ScriptGroupBase *sg) override;
189
190    const RsdCpuReference::CpuSymbol *symLookup(const char *);
191
192    RsdCpuReference::CpuScript *lookupScript(const Script *s) {
193        return mScriptLookupFn(mRSC, s);
194    }
195
196    void setSelectRTCallback(RSSelectRTCallback pSelectRTCallback) {
197        mSelectRTCallback = pSelectRTCallback;
198    }
199    RSSelectRTCallback getSelectRTCallback() {
200        return mSelectRTCallback;
201    }
202
203    virtual void setBccPluginName(const char *name) {
204        mBccPluginName.setTo(name);
205    }
206    virtual const char *getBccPluginName() const {
207        return mBccPluginName.string();
208    }
209    bool getInKernel() override { return mInKernel; }
210
211    // Set to true if we should embed global variable information in the code.
212    void setEmbedGlobalInfo(bool v) override {
213        mEmbedGlobalInfo = v;
214    }
215
216    // Returns true if we should embed global variable information in the code.
217    bool getEmbedGlobalInfo() const override {
218        return mEmbedGlobalInfo;
219    }
220
221    // Set to true if we should skip constant (immutable) global variables when
222    // potentially embedding information about globals.
223    void setEmbedGlobalInfoSkipConstant(bool v) override {
224        mEmbedGlobalInfoSkipConstant = v;
225    }
226
227    // Returns true if we should skip constant (immutable) global variables when
228    // potentially embedding information about globals.
229    bool getEmbedGlobalInfoSkipConstant() const override {
230        return mEmbedGlobalInfoSkipConstant;
231    }
232
233protected:
234    Context *mRSC;
235    uint32_t version_major;
236    uint32_t version_minor;
237    //bool mHasGraphics;
238    bool mInKernel;  // Is a parallel kernel execution underway?
239
240    struct Workers {
241        volatile int mRunningCount;
242        volatile int mLaunchCount;
243        uint32_t mCount;
244        pthread_t *mThreadId;
245        pid_t *mNativeThreadId;
246        Signal mCompleteSignal;
247        Signal *mLaunchSignals;
248        WorkerCallback_t mLaunchCallback;
249        void *mLaunchData;
250    };
251    Workers mWorkers;
252    bool mExit;
253    sym_lookup_t mSymLookupFn;
254    script_lookup_t mScriptLookupFn;
255
256    ScriptTLSStruct mTlsStruct;
257
258    RSSelectRTCallback mSelectRTCallback;
259    String8 mBccPluginName;
260
261    // Specifies whether we should embed global variable information in the
262    // code via special RS variables that can be examined later by the driver.
263    // Defaults to true.
264    bool mEmbedGlobalInfo;
265
266    // Specifies whether we should skip constant (immutable) global variables
267    // when potentially embedding information about globals.
268    // Defaults to true.
269    bool mEmbedGlobalInfoSkipConstant;
270
271    long mPageSize;
272
273    // Launch a general reduce kernel
274    void launchReduceNewSerial(const Allocation ** ains, uint32_t inLen, Allocation *aout,
275                               MTLaunchStructReduceNew *mtls);
276    void launchReduceNewParallel(const Allocation ** ains, uint32_t inLen, Allocation *aout,
277                                 MTLaunchStructReduceNew *mtls);
278};
279
280
281}
282}
283
284#endif
285