rsCpuIntrinsicResize.cpp revision 9ed79105cc6a8dbfaf959875249f36022cc2c798
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18#include "rsCpuIntrinsic.h" 19#include "rsCpuIntrinsicInlines.h" 20 21using namespace android; 22using namespace android::renderscript; 23 24namespace android { 25namespace renderscript { 26 27 28class RsdCpuScriptIntrinsicResize : public RsdCpuScriptIntrinsic { 29public: 30 virtual void populateScript(Script *); 31 virtual void invokeFreeChildren(); 32 33 virtual void setGlobalObj(uint32_t slot, ObjectBase *data); 34 35 virtual ~RsdCpuScriptIntrinsicResize(); 36 RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *); 37 38 virtual void preLaunch(uint32_t slot, const Allocation ** ains, 39 uint32_t inLen, Allocation * aout, const void * usr, 40 uint32_t usrLen, const RsScriptCall *sc); 41 42 float scaleX; 43 float scaleY; 44 45protected: 46 ObjectBaseRef<const Allocation> mAlloc; 47 ObjectBaseRef<const Element> mElement; 48 49 static void kernelU1(const RsExpandKernelParams *p, 50 uint32_t xstart, uint32_t xend, 51 uint32_t outstep); 52 static void kernelU2(const RsExpandKernelParams *p, 53 uint32_t xstart, uint32_t xend, 54 uint32_t outstep); 55 static void kernelU4(const RsExpandKernelParams *p, 56 uint32_t xstart, uint32_t xend, 57 uint32_t outstep); 58}; 59 60} 61} 62 63 64void RsdCpuScriptIntrinsicResize::setGlobalObj(uint32_t slot, ObjectBase *data) { 65 rsAssert(slot == 0); 66 mAlloc.set(static_cast<Allocation *>(data)); 67} 68 69 70extern "C" void rsdIntrinsicConvolve3x3_K(void *dst, const void *y0, const void *y1, 71 const void *y2, const short *coef, uint32_t count); 72 73static float4 cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3, float x) { 74 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3 75 + x * (3.f * (p1 - p2) + p3 - p0))); 76} 77 78static float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) { 79 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3 80 + x * (3.f * (p1 - p2) + p3 - p0))); 81} 82 83static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) { 84 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3 85 + x * (3.f * (p1 - p2) + p3 - p0))); 86} 87 88static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3, 89 float xf, float yf, int width) { 90 int startx = (int) floor(xf - 2); 91 xf = xf - floor(xf); 92 int maxx = width - 1; 93 int xs0 = rsMax(0, startx + 0); 94 int xs1 = rsMax(0, startx + 1); 95 int xs2 = rsMin(maxx, startx + 2); 96 int xs3 = rsMin(maxx, startx + 3); 97 98 float4 p0 = cubicInterpolate(convert_float4(yp0[xs0]), 99 convert_float4(yp0[xs1]), 100 convert_float4(yp0[xs2]), 101 convert_float4(yp0[xs3]), xf); 102 103 float4 p1 = cubicInterpolate(convert_float4(yp1[xs0]), 104 convert_float4(yp1[xs1]), 105 convert_float4(yp1[xs2]), 106 convert_float4(yp1[xs3]), xf); 107 108 float4 p2 = cubicInterpolate(convert_float4(yp2[xs0]), 109 convert_float4(yp2[xs1]), 110 convert_float4(yp2[xs2]), 111 convert_float4(yp2[xs3]), xf); 112 113 float4 p3 = cubicInterpolate(convert_float4(yp3[xs0]), 114 convert_float4(yp3[xs1]), 115 convert_float4(yp3[xs2]), 116 convert_float4(yp3[xs3]), xf); 117 118 float4 p = cubicInterpolate(p0, p1, p2, p3, yf); 119 p = clamp(p, 0.f, 255.f); 120 return convert_uchar4(p); 121} 122 123static uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3, 124 float xf, float yf, int width) { 125 int startx = (int) floor(xf - 2); 126 xf = xf - floor(xf); 127 int maxx = width - 1; 128 int xs0 = rsMax(0, startx + 0); 129 int xs1 = rsMax(0, startx + 1); 130 int xs2 = rsMin(maxx, startx + 2); 131 int xs3 = rsMin(maxx, startx + 3); 132 133 float2 p0 = cubicInterpolate(convert_float2(yp0[xs0]), 134 convert_float2(yp0[xs1]), 135 convert_float2(yp0[xs2]), 136 convert_float2(yp0[xs3]), xf); 137 138 float2 p1 = cubicInterpolate(convert_float2(yp1[xs0]), 139 convert_float2(yp1[xs1]), 140 convert_float2(yp1[xs2]), 141 convert_float2(yp1[xs3]), xf); 142 143 float2 p2 = cubicInterpolate(convert_float2(yp2[xs0]), 144 convert_float2(yp2[xs1]), 145 convert_float2(yp2[xs2]), 146 convert_float2(yp2[xs3]), xf); 147 148 float2 p3 = cubicInterpolate(convert_float2(yp3[xs0]), 149 convert_float2(yp3[xs1]), 150 convert_float2(yp3[xs2]), 151 convert_float2(yp3[xs3]), xf); 152 153 float2 p = cubicInterpolate(p0, p1, p2, p3, yf); 154 p = clamp(p, 0.f, 255.f); 155 return convert_uchar2(p); 156} 157 158static uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3, 159 float xf, float yf, int width) { 160 int startx = (int) floor(xf - 2); 161 xf = xf - floor(xf); 162 int maxx = width - 1; 163 int xs0 = rsMax(0, startx + 0); 164 int xs1 = rsMax(0, startx + 1); 165 int xs2 = rsMin(maxx, startx + 2); 166 int xs3 = rsMin(maxx, startx + 3); 167 168 float p0 = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1], 169 (float)yp0[xs2], (float)yp0[xs3], xf); 170 float p1 = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1], 171 (float)yp1[xs2], (float)yp1[xs3], xf); 172 float p2 = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1], 173 (float)yp2[xs2], (float)yp2[xs3], xf); 174 float p3 = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1], 175 (float)yp3[xs2], (float)yp3[xs3], xf); 176 177 float p = cubicInterpolate(p0, p1, p2, p3, yf); 178 p = clamp(p, 0.f, 255.f); 179 return (uchar)p; 180} 181 182void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelParams *p, 183 uint32_t xstart, uint32_t xend, 184 uint32_t outstep) { 185 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr; 186 187 if (!cp->mAlloc.get()) { 188 ALOGE("Resize executed without input, skipping"); 189 return; 190 } 191 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 192 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; 193 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; 194 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 195 196 float yf = p->y * cp->scaleY; 197 int starty = (int) floor(yf - 2); 198 yf = yf - floor(yf); 199 int maxy = srcHeight - 1; 200 int ys0 = rsMax(0, starty + 0); 201 int ys1 = rsMax(0, starty + 1); 202 int ys2 = rsMin(maxy, starty + 2); 203 int ys3 = rsMin(maxy, starty + 3); 204 205 const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0); 206 const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1); 207 const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2); 208 const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3); 209 210 uchar4 *out = ((uchar4 *)p->out) + xstart; 211 uint32_t x1 = xstart; 212 uint32_t x2 = xend; 213 214 while(x1 < x2) { 215 float xf = x1 * cp->scaleX; 216 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); 217 out++; 218 x1++; 219 } 220} 221 222void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelParams *p, 223 uint32_t xstart, uint32_t xend, 224 uint32_t outstep) { 225 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr; 226 227 if (!cp->mAlloc.get()) { 228 ALOGE("Resize executed without input, skipping"); 229 return; 230 } 231 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 232 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; 233 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; 234 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 235 236 float yf = p->y * cp->scaleY; 237 int starty = (int) floor(yf - 2); 238 yf = yf - floor(yf); 239 int maxy = srcHeight - 1; 240 int ys0 = rsMax(0, starty + 0); 241 int ys1 = rsMax(0, starty + 1); 242 int ys2 = rsMin(maxy, starty + 2); 243 int ys3 = rsMin(maxy, starty + 3); 244 245 const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0); 246 const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1); 247 const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2); 248 const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3); 249 250 uchar2 *out = ((uchar2 *)p->out) + xstart; 251 uint32_t x1 = xstart; 252 uint32_t x2 = xend; 253 254 while(x1 < x2) { 255 float xf = x1 * cp->scaleX; 256 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); 257 out++; 258 x1++; 259 } 260} 261 262void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelParams *p, 263 uint32_t xstart, uint32_t xend, 264 uint32_t outstep) { 265 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr; 266 267 if (!cp->mAlloc.get()) { 268 ALOGE("Resize executed without input, skipping"); 269 return; 270 } 271 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 272 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; 273 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; 274 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 275 276 float yf = p->y * cp->scaleY; 277 int starty = (int) floor(yf - 2); 278 yf = yf - floor(yf); 279 int maxy = srcHeight - 1; 280 int ys0 = rsMax(0, starty + 0); 281 int ys1 = rsMax(0, starty + 1); 282 int ys2 = rsMin(maxy, starty + 2); 283 int ys3 = rsMin(maxy, starty + 3); 284 285 const uchar *yp0 = pin + stride * ys0; 286 const uchar *yp1 = pin + stride * ys1; 287 const uchar *yp2 = pin + stride * ys2; 288 const uchar *yp3 = pin + stride * ys3; 289 290 uchar *out = ((uchar *)p->out) + xstart; 291 uint32_t x1 = xstart; 292 uint32_t x2 = xend; 293 294 while(x1 < x2) { 295 float xf = x1 * cp->scaleX; 296 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); 297 out++; 298 x1++; 299 } 300} 301 302RsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize ( 303 RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) 304 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_RESIZE) { 305 306} 307 308RsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() { 309} 310 311void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot, 312 const Allocation ** ains, 313 uint32_t inLen, Allocation * aout, 314 const void * usr, uint32_t usrLen, 315 const RsScriptCall *sc) 316{ 317 if (!mAlloc.get()) { 318 ALOGE("Resize executed without input, skipping"); 319 return; 320 } 321 const uint32_t srcHeight = mAlloc->mHal.drvState.lod[0].dimY; 322 const uint32_t srcWidth = mAlloc->mHal.drvState.lod[0].dimX; 323 const size_t stride = mAlloc->mHal.drvState.lod[0].stride; 324 325 switch(mAlloc->getType()->getElement()->getVectorSize()) { 326 case 1: 327 mRootPtr = &kernelU1; 328 break; 329 case 2: 330 mRootPtr = &kernelU2; 331 break; 332 case 3: 333 case 4: 334 mRootPtr = &kernelU4; 335 break; 336 } 337 338 scaleX = (float)srcWidth / aout->mHal.drvState.lod[0].dimX; 339 scaleY = (float)srcHeight / aout->mHal.drvState.lod[0].dimY; 340 341} 342 343void RsdCpuScriptIntrinsicResize::populateScript(Script *s) { 344 s->mHal.info.exportedVariableCount = 1; 345} 346 347void RsdCpuScriptIntrinsicResize::invokeFreeChildren() { 348 mAlloc.clear(); 349} 350 351 352RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) { 353 354 return new RsdCpuScriptIntrinsicResize(ctx, s, e); 355} 356