1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "rsCpuIntrinsic.h" 18#include "rsCpuIntrinsicInlines.h" 19 20using namespace android; 21using namespace android::renderscript; 22 23namespace android { 24namespace renderscript { 25 26 27class RsdCpuScriptIntrinsicHistogram : public RsdCpuScriptIntrinsic { 28public: 29 virtual void populateScript(Script *); 30 virtual void invokeFreeChildren(); 31 32 virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength); 33 virtual void setGlobalObj(uint32_t slot, ObjectBase *data); 34 35 virtual ~RsdCpuScriptIntrinsicHistogram(); 36 RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e); 37 38protected: 39 void preLaunch(uint32_t slot, const Allocation * ain, 40 Allocation * aout, const void * usr, 41 uint32_t usrLen, const RsScriptCall *sc); 42 void postLaunch(uint32_t slot, const Allocation * ain, 43 Allocation * aout, const void * usr, 44 uint32_t usrLen, const RsScriptCall *sc); 45 46 47 float mDot[4]; 48 int mDotI[4]; 49 int *mSums; 50 ObjectBaseRef<Allocation> mAllocOut; 51 52 static void kernelP1U4(const RsForEachStubParamStruct *p, 53 uint32_t xstart, uint32_t xend, 54 uint32_t instep, uint32_t outstep); 55 static void kernelP1U3(const RsForEachStubParamStruct *p, 56 uint32_t xstart, uint32_t xend, 57 uint32_t instep, uint32_t outstep); 58 static void kernelP1U2(const RsForEachStubParamStruct *p, 59 uint32_t xstart, uint32_t xend, 60 uint32_t instep, uint32_t outstep); 61 static void kernelP1L(const RsForEachStubParamStruct *p, 62 uint32_t xstart, uint32_t xend, 63 uint32_t instep, uint32_t outstep); 64 static void kernelP1U1(const RsForEachStubParamStruct *p, 65 uint32_t xstart, uint32_t xend, 66 uint32_t instep, uint32_t outstep); 67 68}; 69 70} 71} 72 73void RsdCpuScriptIntrinsicHistogram::setGlobalObj(uint32_t slot, ObjectBase *data) { 74 rsAssert(slot == 1); 75 mAllocOut.set(static_cast<Allocation *>(data)); 76} 77 78void RsdCpuScriptIntrinsicHistogram::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { 79 rsAssert(slot == 0); 80 rsAssert(dataLength == 16); 81 memcpy(mDot, data, 16); 82 mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f); 83 mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f); 84 mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f); 85 mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f); 86} 87 88 89 90void RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot, const Allocation * ain, 91 Allocation * aout, const void * usr, 92 uint32_t usrLen, const RsScriptCall *sc) { 93 94 const uint32_t threads = mCtx->getThreadCount(); 95 const uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize(); 96 97 switch (slot) { 98 case 0: 99 switch(mAllocOut->getType()->getElement()->getVectorSize()) { 100 case 1: 101 mRootPtr = &kernelP1U1; 102 break; 103 case 2: 104 mRootPtr = &kernelP1U2; 105 break; 106 case 3: 107 mRootPtr = &kernelP1U3; 108 break; 109 case 4: 110 mRootPtr = &kernelP1U4; 111 break; 112 } 113 break; 114 case 1: 115 mRootPtr = &kernelP1L; 116 break; 117 } 118 memset(mSums, 0, 256 * 4 * threads * vSize); 119} 120 121void RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot, const Allocation * ain, 122 Allocation * aout, const void * usr, 123 uint32_t usrLen, const RsScriptCall *sc) { 124 125 unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr; 126 uint32_t threads = mCtx->getThreadCount(); 127 uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize(); 128 129 for (uint32_t ct=0; ct < (256 * vSize); ct++) { 130 o[ct] = mSums[ct]; 131 for (uint32_t t=1; t < threads; t++) { 132 o[ct] += mSums[ct + 256 * vSize]; 133 } 134 } 135} 136 137void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsForEachStubParamStruct *p, 138 uint32_t xstart, uint32_t xend, 139 uint32_t instep, uint32_t outstep) { 140 141 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 142 uchar *in = (uchar *)p->in; 143 int * sums = &cp->mSums[256 * 4 * p->lid]; 144 145 for (uint32_t x = xstart; x < xend; x++) { 146 sums[(in[0] << 2) ] ++; 147 sums[(in[1] << 2) + 1] ++; 148 sums[(in[2] << 2) + 2] ++; 149 sums[(in[3] << 2) + 3] ++; 150 in += 4; 151 } 152} 153 154void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsForEachStubParamStruct *p, 155 uint32_t xstart, uint32_t xend, 156 uint32_t instep, uint32_t outstep) { 157 158 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 159 uchar *in = (uchar *)p->in; 160 int * sums = &cp->mSums[256 * 4 * p->lid]; 161 162 for (uint32_t x = xstart; x < xend; x++) { 163 sums[(in[0] << 2) ] ++; 164 sums[(in[1] << 2) + 1] ++; 165 sums[(in[2] << 2) + 2] ++; 166 in += 4; 167 } 168} 169 170void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsForEachStubParamStruct *p, 171 uint32_t xstart, uint32_t xend, 172 uint32_t instep, uint32_t outstep) { 173 174 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 175 uchar *in = (uchar *)p->in; 176 int * sums = &cp->mSums[256 * 2 * p->lid]; 177 178 for (uint32_t x = xstart; x < xend; x++) { 179 sums[(in[0] << 2) ] ++; 180 sums[(in[1] << 2) + 1] ++; 181 in += 2; 182 } 183} 184 185void RsdCpuScriptIntrinsicHistogram::kernelP1L(const RsForEachStubParamStruct *p, 186 uint32_t xstart, uint32_t xend, 187 uint32_t instep, uint32_t outstep) { 188 189 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 190 uchar *in = (uchar *)p->in; 191 int * sums = &cp->mSums[256 * p->lid]; 192 193 for (uint32_t x = xstart; x < xend; x++) { 194 int t = (cp->mDotI[0] * in[0]) + 195 (cp->mDotI[1] * in[1]) + 196 (cp->mDotI[2] * in[2]) + 197 (cp->mDotI[3] * in[3]); 198 sums[t >> 8] ++; 199 in += 4; 200 } 201} 202 203void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsForEachStubParamStruct *p, 204 uint32_t xstart, uint32_t xend, 205 uint32_t instep, uint32_t outstep) { 206 207} 208 209 210RsdCpuScriptIntrinsicHistogram::RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, 211 const Script *s, const Element *e) 212 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLUR) { 213 214 mRootPtr = NULL; 215 mSums = new int[256 * 4 * mCtx->getThreadCount()]; 216 mDot[0] = 0.299f; 217 mDot[1] = 0.587f; 218 mDot[2] = 0.114f; 219 mDot[3] = 0; 220 mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f); 221 mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f); 222 mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f); 223 mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f); 224} 225 226RsdCpuScriptIntrinsicHistogram::~RsdCpuScriptIntrinsicHistogram() { 227 if (mSums) { 228 delete []mSums; 229 } 230} 231 232void RsdCpuScriptIntrinsicHistogram::populateScript(Script *s) { 233 s->mHal.info.exportedVariableCount = 2; 234} 235 236void RsdCpuScriptIntrinsicHistogram::invokeFreeChildren() { 237} 238 239 240RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) { 241 242 return new RsdCpuScriptIntrinsicHistogram(ctx, s, e); 243} 244 245 246