1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18#include "rsCpuIntrinsic.h" 19#include "rsCpuIntrinsicInlines.h" 20 21using namespace android; 22using namespace android::renderscript; 23 24namespace android { 25namespace renderscript { 26 27 28class RsdCpuScriptIntrinsicResize : public RsdCpuScriptIntrinsic { 29public: 30 virtual void populateScript(Script *); 31 virtual void invokeFreeChildren(); 32 33 virtual void setGlobalObj(uint32_t slot, ObjectBase *data); 34 35 virtual ~RsdCpuScriptIntrinsicResize(); 36 RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *); 37 38 virtual void preLaunch(uint32_t slot, const Allocation * ain, 39 Allocation * aout, const void * usr, 40 uint32_t usrLen, const RsScriptCall *sc); 41 42 float scaleX; 43 float scaleY; 44 45protected: 46 ObjectBaseRef<const Allocation> mAlloc; 47 ObjectBaseRef<const Element> mElement; 48 49 static void kernelU1(const RsForEachStubParamStruct *p, 50 uint32_t xstart, uint32_t xend, 51 uint32_t instep, uint32_t outstep); 52 static void kernelU2(const RsForEachStubParamStruct *p, 53 uint32_t xstart, uint32_t xend, 54 uint32_t instep, uint32_t outstep); 55 static void kernelU4(const RsForEachStubParamStruct *p, 56 uint32_t xstart, uint32_t xend, 57 uint32_t instep, uint32_t outstep); 58}; 59 60} 61} 62 63 64void RsdCpuScriptIntrinsicResize::setGlobalObj(uint32_t slot, ObjectBase *data) { 65 rsAssert(slot == 0); 66 mAlloc.set(static_cast<Allocation *>(data)); 67} 68 69static float4 cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3, float x) { 70 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3 71 + x * (3.f * (p1 - p2) + p3 - p0))); 72} 73 74static float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) { 75 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3 76 + x * (3.f * (p1 - p2) + p3 - p0))); 77} 78 79static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) { 80 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3 81 + x * (3.f * (p1 - p2) + p3 - p0))); 82} 83 84static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3, 85 float xf, float yf, int width) { 86 int startx = (int) floor(xf - 1); 87 xf = xf - floor(xf); 88 int maxx = width - 1; 89 int xs0 = rsMax(0, startx + 0); 90 int xs1 = rsMax(0, startx + 1); 91 int xs2 = rsMin(maxx, startx + 2); 92 int xs3 = rsMin(maxx, startx + 3); 93 94 float4 p0 = cubicInterpolate(convert_float4(yp0[xs0]), 95 convert_float4(yp0[xs1]), 96 convert_float4(yp0[xs2]), 97 convert_float4(yp0[xs3]), xf); 98 99 float4 p1 = cubicInterpolate(convert_float4(yp1[xs0]), 100 convert_float4(yp1[xs1]), 101 convert_float4(yp1[xs2]), 102 convert_float4(yp1[xs3]), xf); 103 104 float4 p2 = cubicInterpolate(convert_float4(yp2[xs0]), 105 convert_float4(yp2[xs1]), 106 convert_float4(yp2[xs2]), 107 convert_float4(yp2[xs3]), xf); 108 109 float4 p3 = cubicInterpolate(convert_float4(yp3[xs0]), 110 convert_float4(yp3[xs1]), 111 convert_float4(yp3[xs2]), 112 convert_float4(yp3[xs3]), xf); 113 114 float4 p = cubicInterpolate(p0, p1, p2, p3, yf); 115 p = clamp(p + 0.5f, 0.f, 255.f); 116 return convert_uchar4(p); 117} 118 119static uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3, 120 float xf, float yf, int width) { 121 int startx = (int) floor(xf - 1); 122 xf = xf - floor(xf); 123 int maxx = width - 1; 124 int xs0 = rsMax(0, startx + 0); 125 int xs1 = rsMax(0, startx + 1); 126 int xs2 = rsMin(maxx, startx + 2); 127 int xs3 = rsMin(maxx, startx + 3); 128 129 float2 p0 = cubicInterpolate(convert_float2(yp0[xs0]), 130 convert_float2(yp0[xs1]), 131 convert_float2(yp0[xs2]), 132 convert_float2(yp0[xs3]), xf); 133 134 float2 p1 = cubicInterpolate(convert_float2(yp1[xs0]), 135 convert_float2(yp1[xs1]), 136 convert_float2(yp1[xs2]), 137 convert_float2(yp1[xs3]), xf); 138 139 float2 p2 = cubicInterpolate(convert_float2(yp2[xs0]), 140 convert_float2(yp2[xs1]), 141 convert_float2(yp2[xs2]), 142 convert_float2(yp2[xs3]), xf); 143 144 float2 p3 = cubicInterpolate(convert_float2(yp3[xs0]), 145 convert_float2(yp3[xs1]), 146 convert_float2(yp3[xs2]), 147 convert_float2(yp3[xs3]), xf); 148 149 float2 p = cubicInterpolate(p0, p1, p2, p3, yf); 150 p = clamp(p + 0.5f, 0.f, 255.f); 151 return convert_uchar2(p); 152} 153 154static uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3, 155 float xf, float yf, int width) { 156 int startx = (int) floor(xf - 1); 157 xf = xf - floor(xf); 158 int maxx = width - 1; 159 int xs0 = rsMax(0, startx + 0); 160 int xs1 = rsMax(0, startx + 1); 161 int xs2 = rsMin(maxx, startx + 2); 162 int xs3 = rsMin(maxx, startx + 3); 163 164 float p0 = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1], 165 (float)yp0[xs2], (float)yp0[xs3], xf); 166 float p1 = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1], 167 (float)yp1[xs2], (float)yp1[xs3], xf); 168 float p2 = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1], 169 (float)yp2[xs2], (float)yp2[xs3], xf); 170 float p3 = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1], 171 (float)yp3[xs2], (float)yp3[xs3], xf); 172 173 float p = cubicInterpolate(p0, p1, p2, p3, yf); 174 p = clamp(p + 0.5f, 0.f, 255.f); 175 return (uchar)p; 176} 177 178void RsdCpuScriptIntrinsicResize::kernelU4(const RsForEachStubParamStruct *p, 179 uint32_t xstart, uint32_t xend, 180 uint32_t instep, uint32_t outstep) { 181 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr; 182 183 if (!cp->mAlloc.get()) { 184 ALOGE("Resize executed without input, skipping"); 185 return; 186 } 187 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 188 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; 189 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; 190 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 191 192 float yf = (p->y + 0.5f) * cp->scaleY - 0.5f; 193 int starty = (int) floor(yf - 1); 194 yf = yf - floor(yf); 195 int maxy = srcHeight - 1; 196 int ys0 = rsMax(0, starty + 0); 197 int ys1 = rsMax(0, starty + 1); 198 int ys2 = rsMin(maxy, starty + 2); 199 int ys3 = rsMin(maxy, starty + 3); 200 201 const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0); 202 const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1); 203 const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2); 204 const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3); 205 206 uchar4 *out = ((uchar4 *)p->out) + xstart; 207 uint32_t x1 = xstart; 208 uint32_t x2 = xend; 209 210 while(x1 < x2) { 211 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 212 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); 213 out++; 214 x1++; 215 } 216} 217 218void RsdCpuScriptIntrinsicResize::kernelU2(const RsForEachStubParamStruct *p, 219 uint32_t xstart, uint32_t xend, 220 uint32_t instep, uint32_t outstep) { 221 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr; 222 223 if (!cp->mAlloc.get()) { 224 ALOGE("Resize executed without input, skipping"); 225 return; 226 } 227 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 228 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; 229 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; 230 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 231 232 float yf = (p->y + 0.5f) * cp->scaleY - 0.5f; 233 int starty = (int) floor(yf - 1); 234 yf = yf - floor(yf); 235 int maxy = srcHeight - 1; 236 int ys0 = rsMax(0, starty + 0); 237 int ys1 = rsMax(0, starty + 1); 238 int ys2 = rsMin(maxy, starty + 2); 239 int ys3 = rsMin(maxy, starty + 3); 240 241 const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0); 242 const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1); 243 const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2); 244 const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3); 245 246 uchar2 *out = ((uchar2 *)p->out) + xstart; 247 uint32_t x1 = xstart; 248 uint32_t x2 = xend; 249 250 while(x1 < x2) { 251 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 252 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); 253 out++; 254 x1++; 255 } 256} 257 258void RsdCpuScriptIntrinsicResize::kernelU1(const RsForEachStubParamStruct *p, 259 uint32_t xstart, uint32_t xend, 260 uint32_t instep, uint32_t outstep) { 261 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr; 262 263 if (!cp->mAlloc.get()) { 264 ALOGE("Resize executed without input, skipping"); 265 return; 266 } 267 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 268 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; 269 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; 270 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 271 272 float yf = (p->y + 0.5f) * cp->scaleY - 0.5f; 273 int starty = (int) floor(yf - 1); 274 yf = yf - floor(yf); 275 int maxy = srcHeight - 1; 276 int ys0 = rsMax(0, starty + 0); 277 int ys1 = rsMax(0, starty + 1); 278 int ys2 = rsMin(maxy, starty + 2); 279 int ys3 = rsMin(maxy, starty + 3); 280 281 const uchar *yp0 = pin + stride * ys0; 282 const uchar *yp1 = pin + stride * ys1; 283 const uchar *yp2 = pin + stride * ys2; 284 const uchar *yp3 = pin + stride * ys3; 285 286 uchar *out = ((uchar *)p->out) + xstart; 287 uint32_t x1 = xstart; 288 uint32_t x2 = xend; 289 290 while(x1 < x2) { 291 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 292 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); 293 out++; 294 x1++; 295 } 296} 297 298RsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize ( 299 RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) 300 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_RESIZE) { 301 302} 303 304RsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() { 305} 306 307void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot, const Allocation * ain, 308 Allocation * aout, const void * usr, 309 uint32_t usrLen, const RsScriptCall *sc) 310{ 311 if (!mAlloc.get()) { 312 ALOGE("Resize executed without input, skipping"); 313 return; 314 } 315 const uint32_t srcHeight = mAlloc->mHal.drvState.lod[0].dimY; 316 const uint32_t srcWidth = mAlloc->mHal.drvState.lod[0].dimX; 317 const size_t stride = mAlloc->mHal.drvState.lod[0].stride; 318 319 switch(mAlloc->getType()->getElement()->getVectorSize()) { 320 case 1: 321 mRootPtr = &kernelU1; 322 break; 323 case 2: 324 mRootPtr = &kernelU2; 325 break; 326 case 3: 327 case 4: 328 mRootPtr = &kernelU4; 329 break; 330 } 331 332 scaleX = (float)srcWidth / aout->mHal.drvState.lod[0].dimX; 333 scaleY = (float)srcHeight / aout->mHal.drvState.lod[0].dimY; 334 335} 336 337void RsdCpuScriptIntrinsicResize::populateScript(Script *s) { 338 s->mHal.info.exportedVariableCount = 1; 339} 340 341void RsdCpuScriptIntrinsicResize::invokeFreeChildren() { 342 mAlloc.clear(); 343} 344 345 346RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) { 347 348 return new RsdCpuScriptIntrinsicResize(ctx, s, e); 349} 350 351 352