rsCpuCore.h revision 013ff53dc8d6c2464e1b5e1ea8c50b68f7b4aa73
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef RSD_CPU_CORE_H 18#define RSD_CPU_CORE_H 19 20#include "rsd_cpu.h" 21#include "rsSignal.h" 22#include "rsContext.h" 23#include "rsCppUtils.h" 24#include "rsElement.h" 25#include "rsScriptC.h" 26#include "rsCpuCoreRuntime.h" 27 28namespace android { 29namespace renderscript { 30 31// Whether the CPU we're running on supports SIMD instructions 32extern bool gArchUseSIMD; 33 34// Function types found in RenderScript code 35typedef void (*ReduceFunc_t)(const uint8_t *inBuf, uint8_t *outBuf, uint32_t len); 36typedef void (*ReduceNewAccumulatorFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint8_t *accum); 37typedef void (*ReduceNewCombinerFunc_t)(uint8_t *accum, const uint8_t *other); 38typedef void (*ReduceNewInitializerFunc_t)(uint8_t *accum); 39typedef void (*ReduceNewOutConverterFunc_t)(uint8_t *out, const uint8_t *accum); 40typedef void (*ForEachFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint32_t outStride); 41typedef void (*InvokeFunc_t)(void *params); 42typedef void (*InitOrDtorFunc_t)(void); 43typedef int (*RootFunc_t)(void); 44 45struct ReduceNewDescription { 46 ReduceNewAccumulatorFunc_t accumFunc; // expanded accumulator function 47 ReduceNewInitializerFunc_t initFunc; // user initializer function 48 ReduceNewCombinerFunc_t combFunc; // user combiner function 49 ReduceNewOutConverterFunc_t outFunc; // user outconverter function 50 size_t accumSize; // accumulator datum size, in bytes 51}; 52 53// Internal driver callback used to execute a kernel 54typedef void (*WorkerCallback_t)(void *usr, uint32_t idx); 55 56class RsdCpuScriptImpl; 57class RsdCpuReferenceImpl; 58 59struct ScriptTLSStruct { 60 android::renderscript::Context * mContext; 61 const android::renderscript::Script * mScript; 62 RsdCpuScriptImpl *mImpl; 63}; 64 65// MTLaunchStruct passes information about a multithreaded kernel launch. 66struct MTLaunchStructCommon { 67 RsdCpuReferenceImpl *rs; 68 RsdCpuScriptImpl *script; 69 70 uint32_t mSliceSize; 71 volatile int mSliceNum; 72 bool isThreadable; 73 74 // Boundary information about the launch 75 RsLaunchDimensions start; 76 RsLaunchDimensions end; 77 // Points to MTLaunchStructForEach::fep::dim or 78 // MTLaunchStructReduce::inputDim or 79 // MTLaunchStructReduceNew::redp::dim. 80 RsLaunchDimensions *dimPtr; 81}; 82 83struct MTLaunchStructForEach : public MTLaunchStructCommon { 84 // Driver info structure 85 RsExpandKernelDriverInfo fep; 86 87 ForEachFunc_t kernel; 88 const Allocation *ains[RS_KERNEL_INPUT_LIMIT]; 89 Allocation *aout[RS_KERNEL_INPUT_LIMIT]; 90}; 91 92struct MTLaunchStructReduce : public MTLaunchStructCommon { 93 ReduceFunc_t kernel; 94 const uint8_t *inBuf; 95 uint8_t *outBuf; 96 RsLaunchDimensions inputDim; 97}; 98 99struct MTLaunchStructReduceNew : public MTLaunchStructCommon { 100 // Driver info structure 101 RsExpandKernelDriverInfo redp; 102 103 const Allocation *ains[RS_KERNEL_INPUT_LIMIT]; 104 105 ReduceNewAccumulatorFunc_t accumFunc; 106 ReduceNewInitializerFunc_t initFunc; 107 ReduceNewCombinerFunc_t combFunc; 108 ReduceNewOutConverterFunc_t outFunc; 109 110 size_t accumSize; // accumulator datum size in bytes 111 112 size_t accumStride; // stride between accumulators in accumAlloc (below) 113 114 // These fields are used for managing accumulator data items in a 115 // multithreaded execution. 116 // 117 // Let the number of threads be N. 118 // Let Outc be true iff there is an outconverter. 119 // 120 // accumAlloc is a pointer to a single allocation of (N - !Outc) 121 // accumulators. (If there is no outconverter, then the output 122 // allocation acts as an accumulator.) It is created at kernel 123 // launch time. Within that allocation, the distance between the 124 // start of adjacent accumulators is accumStride bytes -- this 125 // might be the same as accumSize, or it might be larger, if we 126 // are attempting to avoid false sharing. 127 // 128 // accumCount is an atomic counter of how many accumulators have 129 // been grabbed by threads. It is initialized to zero at kernel 130 // launch time. See accumPtr for further description. 131 // 132 // accumPtr is pointer to an array of N pointers to accumulators. 133 // The array is created at kernel launch time, and each element is 134 // initialized to nullptr. When a particular thread goes to work, 135 // that thread obtains its accumulator from its entry in this 136 // array. If the entry is nullptr, that thread needs to obtain an 137 // accumulator, and initialize its entry in the array accordingly. 138 // It does so via atomic access (fetch-and-add) to accumCount. 139 // - If Outc, then the fetched value is used as an index into 140 // accumAlloc. 141 // - If !Outc, then 142 // - If the fetched value is zero, then this thread gets the 143 // output allocation for its accumulator. 144 // - If the fetched value is nonzero, then (fetched value - 1) 145 // is used as an index into accumAlloc. 146 uint8_t *accumAlloc; 147 uint8_t **accumPtr; 148 uint32_t accumCount; 149 150 // Logging control 151 uint32_t logReduce; 152}; 153 154class RsdCpuReferenceImpl : public RsdCpuReference { 155public: 156 ~RsdCpuReferenceImpl() override; 157 RsdCpuReferenceImpl(Context *); 158 159 void lockMutex(); 160 void unlockMutex(); 161 162 bool init(uint32_t version_major, uint32_t version_minor, sym_lookup_t, script_lookup_t); 163 void setPriority(int32_t priority) override; 164 virtual void launchThreads(WorkerCallback_t cbk, void *data); 165 static void * helperThreadProc(void *vrsc); 166 RsdCpuScriptImpl * setTLS(RsdCpuScriptImpl *sc); 167 168 Context * getContext() {return mRSC;} 169 uint32_t getThreadCount() const { 170 return mWorkers.mCount + 1; 171 } 172 173 // Launch foreach kernel 174 void launchForEach(const Allocation **ains, uint32_t inLen, Allocation *aout, 175 const RsScriptCall *sc, MTLaunchStructForEach *mtls); 176 177 // Launch a simple reduce kernel 178 void launchReduce(const Allocation *ain, Allocation *aout, 179 MTLaunchStructReduce *mtls); 180 181 // Launch a general reduce kernel 182 void launchReduceNew(const Allocation ** ains, uint32_t inLen, Allocation *aout, 183 MTLaunchStructReduceNew *mtls); 184 185 CpuScript * createScript(const ScriptC *s, char const *resName, char const *cacheDir, 186 uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags) override; 187 CpuScript * createIntrinsic(const Script *s, RsScriptIntrinsicID iid, Element *e) override; 188 void* createScriptGroup(const ScriptGroupBase *sg) override; 189 190 const RsdCpuReference::CpuSymbol *symLookup(const char *); 191 192 RsdCpuReference::CpuScript *lookupScript(const Script *s) { 193 return mScriptLookupFn(mRSC, s); 194 } 195 196 void setSelectRTCallback(RSSelectRTCallback pSelectRTCallback) { 197 mSelectRTCallback = pSelectRTCallback; 198 } 199 RSSelectRTCallback getSelectRTCallback() { 200 return mSelectRTCallback; 201 } 202 203 virtual void setBccPluginName(const char *name) { 204 mBccPluginName.setTo(name); 205 } 206 virtual const char *getBccPluginName() const { 207 return mBccPluginName.string(); 208 } 209 bool getInKernel() override { return mInKernel; } 210 211 // Set to true if we should embed global variable information in the code. 212 void setEmbedGlobalInfo(bool v) override { 213 mEmbedGlobalInfo = v; 214 } 215 216 // Returns true if we should embed global variable information in the code. 217 bool getEmbedGlobalInfo() const override { 218 return mEmbedGlobalInfo; 219 } 220 221 // Set to true if we should skip constant (immutable) global variables when 222 // potentially embedding information about globals. 223 void setEmbedGlobalInfoSkipConstant(bool v) override { 224 mEmbedGlobalInfoSkipConstant = v; 225 } 226 227 // Returns true if we should skip constant (immutable) global variables when 228 // potentially embedding information about globals. 229 bool getEmbedGlobalInfoSkipConstant() const override { 230 return mEmbedGlobalInfoSkipConstant; 231 } 232 233protected: 234 Context *mRSC; 235 uint32_t version_major; 236 uint32_t version_minor; 237 //bool mHasGraphics; 238 bool mInKernel; // Is a parallel kernel execution underway? 239 240 struct Workers { 241 volatile int mRunningCount; 242 volatile int mLaunchCount; 243 uint32_t mCount; 244 pthread_t *mThreadId; 245 pid_t *mNativeThreadId; 246 Signal mCompleteSignal; 247 Signal *mLaunchSignals; 248 WorkerCallback_t mLaunchCallback; 249 void *mLaunchData; 250 }; 251 Workers mWorkers; 252 bool mExit; 253 sym_lookup_t mSymLookupFn; 254 script_lookup_t mScriptLookupFn; 255 256 ScriptTLSStruct mTlsStruct; 257 258 RSSelectRTCallback mSelectRTCallback; 259 String8 mBccPluginName; 260 261 // Specifies whether we should embed global variable information in the 262 // code via special RS variables that can be examined later by the driver. 263 // Defaults to true. 264 bool mEmbedGlobalInfo; 265 266 // Specifies whether we should skip constant (immutable) global variables 267 // when potentially embedding information about globals. 268 // Defaults to true. 269 bool mEmbedGlobalInfoSkipConstant; 270 271 long mPageSize; 272 273 // Launch a general reduce kernel 274 void launchReduceNewSerial(const Allocation ** ains, uint32_t inLen, Allocation *aout, 275 MTLaunchStructReduceNew *mtls); 276 void launchReduceNewParallel(const Allocation ** ains, uint32_t inLen, Allocation *aout, 277 MTLaunchStructReduceNew *mtls); 278}; 279 280 281} 282} 283 284#endif 285