rsCpuIntrinsicResize.cpp revision ef05d4666eb87a924c8883e193fd505245101414
10d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams/*
20d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * Copyright (C) 2014 The Android Open Source Project
30d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams *
40d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * Licensed under the Apache License, Version 2.0 (the "License");
50d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * you may not use this file except in compliance with the License.
60d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * You may obtain a copy of the License at
70d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams *
80d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams *      http://www.apache.org/licenses/LICENSE-2.0
90d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams *
100d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * Unless required by applicable law or agreed to in writing, software
110d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * distributed under the License is distributed on an "AS IS" BASIS,
120d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
130d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * See the License for the specific language governing permissions and
140d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * limitations under the License.
150d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams */
160d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
170d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
180d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams#include "rsCpuIntrinsic.h"
190d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams#include "rsCpuIntrinsicInlines.h"
200d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
210d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsusing namespace android;
220d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsusing namespace android::renderscript;
230d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
240d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsnamespace android {
250d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsnamespace renderscript {
260d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
270d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
280d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsclass RsdCpuScriptIntrinsicResize : public RsdCpuScriptIntrinsic {
290d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samspublic:
300d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    virtual void populateScript(Script *);
310d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    virtual void invokeFreeChildren();
320d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
330d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    virtual void setGlobalObj(uint32_t slot, ObjectBase *data);
340d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
350d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    virtual ~RsdCpuScriptIntrinsicResize();
360d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *);
370d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
38f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes    virtual void preLaunch(uint32_t slot, const Allocation ** ains,
39f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                           uint32_t inLen, Allocation * aout, const void * usr,
400d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                           uint32_t usrLen, const RsScriptCall *sc);
410d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
420d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float scaleX;
430d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float scaleY;
440d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
450d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsprotected:
460d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    ObjectBaseRef<const Allocation> mAlloc;
470d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    ObjectBaseRef<const Element> mElement;
480d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
4980ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes    static void kernelU1(const RsExpandKernelParams *p,
500d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                         uint32_t xstart, uint32_t xend,
519ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                         uint32_t outstep);
5280ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes    static void kernelU2(const RsExpandKernelParams *p,
530d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                         uint32_t xstart, uint32_t xend,
549ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                         uint32_t outstep);
5580ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes    static void kernelU4(const RsExpandKernelParams *p,
560d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                         uint32_t xstart, uint32_t xend,
579ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                         uint32_t outstep);
580d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams};
590d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
600d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
610d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
620d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
630d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
640d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsvoid RsdCpuScriptIntrinsicResize::setGlobalObj(uint32_t slot, ObjectBase *data) {
650d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    rsAssert(slot == 0);
660d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    mAlloc.set(static_cast<Allocation *>(data));
670d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
680d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
690d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsstatic float4 cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3, float x) {
700d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
710d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams            + x * (3.f * (p1 - p2) + p3 - p0)));
720d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
730d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
740d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsstatic float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) {
750d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
760d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams            + x * (3.f * (p1 - p2) + p3 - p0)));
770d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
780d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
790d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsstatic float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) {
800d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
810d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams            + x * (3.f * (p1 - p2) + p3 - p0)));
820d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
830d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
840d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsstatic uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3,
850d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                         float xf, float yf, int width) {
860d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int startx = (int) floor(xf - 2);
870d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    xf = xf - floor(xf);
880d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int maxx = width - 1;
890d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs0 = rsMax(0, startx + 0);
900d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs1 = rsMax(0, startx + 1);
910d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs2 = rsMin(maxx, startx + 2);
920d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs3 = rsMin(maxx, startx + 3);
930d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
940d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float4 p0  = cubicInterpolate(convert_float4(yp0[xs0]),
950d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp0[xs1]),
960d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp0[xs2]),
970d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp0[xs3]), xf);
980d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
990d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float4 p1  = cubicInterpolate(convert_float4(yp1[xs0]),
1000d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp1[xs1]),
1010d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp1[xs2]),
1020d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp1[xs3]), xf);
1030d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1040d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float4 p2  = cubicInterpolate(convert_float4(yp2[xs0]),
1050d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp2[xs1]),
1060d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp2[xs2]),
1070d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp2[xs3]), xf);
1080d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1090d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float4 p3  = cubicInterpolate(convert_float4(yp3[xs0]),
1100d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp3[xs1]),
1110d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp3[xs2]),
1120d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp3[xs3]), xf);
1130d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1140d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float4 p  = cubicInterpolate(p0, p1, p2, p3, yf);
115ef05d4666eb87a924c8883e193fd505245101414Miao Wang    p = clamp(p + 0.5f, 0.f, 255.f);
1160d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return convert_uchar4(p);
1170d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
1180d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1190d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsstatic uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3,
1200d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                         float xf, float yf, int width) {
1210d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int startx = (int) floor(xf - 2);
1220d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    xf = xf - floor(xf);
1230d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int maxx = width - 1;
1240d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs0 = rsMax(0, startx + 0);
1250d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs1 = rsMax(0, startx + 1);
1260d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs2 = rsMin(maxx, startx + 2);
1270d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs3 = rsMin(maxx, startx + 3);
1280d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1290d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float2 p0  = cubicInterpolate(convert_float2(yp0[xs0]),
1300d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp0[xs1]),
1310d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp0[xs2]),
1320d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp0[xs3]), xf);
1330d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1340d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float2 p1  = cubicInterpolate(convert_float2(yp1[xs0]),
1350d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp1[xs1]),
1360d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp1[xs2]),
1370d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp1[xs3]), xf);
1380d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1390d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float2 p2  = cubicInterpolate(convert_float2(yp2[xs0]),
1400d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp2[xs1]),
1410d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp2[xs2]),
1420d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp2[xs3]), xf);
1430d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1440d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float2 p3  = cubicInterpolate(convert_float2(yp3[xs0]),
1450d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp3[xs1]),
1460d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp3[xs2]),
1470d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp3[xs3]), xf);
1480d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1490d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float2 p  = cubicInterpolate(p0, p1, p2, p3, yf);
150ef05d4666eb87a924c8883e193fd505245101414Miao Wang    p = clamp(p + 0.5f, 0.f, 255.f);
1510d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return convert_uchar2(p);
1520d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
1530d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1540d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsstatic uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3,
1550d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                        float xf, float yf, int width) {
1560d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int startx = (int) floor(xf - 2);
1570d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    xf = xf - floor(xf);
1580d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int maxx = width - 1;
1590d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs0 = rsMax(0, startx + 0);
1600d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs1 = rsMax(0, startx + 1);
1610d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs2 = rsMin(maxx, startx + 2);
1620d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs3 = rsMin(maxx, startx + 3);
1630d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1640d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float p0  = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1],
1650d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                 (float)yp0[xs2], (float)yp0[xs3], xf);
1660d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float p1  = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1],
1670d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                 (float)yp1[xs2], (float)yp1[xs3], xf);
1680d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float p2  = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1],
1690d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                 (float)yp2[xs2], (float)yp2[xs3], xf);
1700d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float p3  = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1],
1710d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                 (float)yp3[xs2], (float)yp3[xs3], xf);
1720d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1730d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float p  = cubicInterpolate(p0, p1, p2, p3, yf);
174ef05d4666eb87a924c8883e193fd505245101414Miao Wang    p = clamp(p + 0.5f, 0.f, 255.f);
1750d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return (uchar)p;
1760d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
1770d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
17880ef693674f69c0343c41564e30f80e7fb513b60Chris Wailesvoid RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelParams *p,
1790d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                                uint32_t xstart, uint32_t xend,
1809ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                                                uint32_t outstep) {
1810d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
1820d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1830d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    if (!cp->mAlloc.get()) {
1840d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        ALOGE("Resize executed without input, skipping");
1850d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        return;
1860d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
1870d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
1880d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
1890d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
1900d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
1910d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1920d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float yf = p->y * cp->scaleY;
1930d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int starty = (int) floor(yf - 2);
1940d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    yf = yf - floor(yf);
1950d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int maxy = srcHeight - 1;
1960d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys0 = rsMax(0, starty + 0);
1970d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys1 = rsMax(0, starty + 1);
1980d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys2 = rsMin(maxy, starty + 2);
1990d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys3 = rsMin(maxy, starty + 3);
2000d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
2010d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0);
2020d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1);
2030d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2);
2040d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3);
2050d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
2060d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uchar4 *out = ((uchar4 *)p->out) + xstart;
2070d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uint32_t x1 = xstart;
2080d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uint32_t x2 = xend;
2090d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
2100d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    while(x1 < x2) {
2110d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        float xf = x1 * cp->scaleX;
2120d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
2130d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        out++;
2140d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        x1++;
2150d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
2160d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
2170d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
21880ef693674f69c0343c41564e30f80e7fb513b60Chris Wailesvoid RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelParams *p,
2190d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                                uint32_t xstart, uint32_t xend,
2209ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                                                uint32_t outstep) {
2210d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
2220d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
2230d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    if (!cp->mAlloc.get()) {
2240d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        ALOGE("Resize executed without input, skipping");
2250d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        return;
2260d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
2270d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
2280d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
2290d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
2300d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
2310d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
2320d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float yf = p->y * cp->scaleY;
2330d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int starty = (int) floor(yf - 2);
2340d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    yf = yf - floor(yf);
2350d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int maxy = srcHeight - 1;
2360d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys0 = rsMax(0, starty + 0);
2370d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys1 = rsMax(0, starty + 1);
2380d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys2 = rsMin(maxy, starty + 2);
2390d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys3 = rsMin(maxy, starty + 3);
2400d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
2410d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0);
2420d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1);
2430d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2);
2440d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3);
2450d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
2460d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uchar2 *out = ((uchar2 *)p->out) + xstart;
2470d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uint32_t x1 = xstart;
2480d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uint32_t x2 = xend;
2490d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
2500d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    while(x1 < x2) {
2510d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        float xf = x1 * cp->scaleX;
2520d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
2530d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        out++;
2540d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        x1++;
2550d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
2560d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
2570d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
25880ef693674f69c0343c41564e30f80e7fb513b60Chris Wailesvoid RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelParams *p,
2590d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                                uint32_t xstart, uint32_t xend,
2609ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                                                uint32_t outstep) {
2610d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
2620d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
2630d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    if (!cp->mAlloc.get()) {
2640d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        ALOGE("Resize executed without input, skipping");
2650d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        return;
2660d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
2670d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
2680d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
2690d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
2700d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
2710d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
2720d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float yf = p->y * cp->scaleY;
2730d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int starty = (int) floor(yf - 2);
2740d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    yf = yf - floor(yf);
2750d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int maxy = srcHeight - 1;
2760d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys0 = rsMax(0, starty + 0);
2770d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys1 = rsMax(0, starty + 1);
2780d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys2 = rsMin(maxy, starty + 2);
2790d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys3 = rsMin(maxy, starty + 3);
2800d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
2810d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *yp0 = pin + stride * ys0;
2820d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *yp1 = pin + stride * ys1;
2830d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *yp2 = pin + stride * ys2;
2840d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *yp3 = pin + stride * ys3;
2850d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
2860d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uchar *out = ((uchar *)p->out) + xstart;
2870d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uint32_t x1 = xstart;
2880d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uint32_t x2 = xend;
2890d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
2900d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    while(x1 < x2) {
2910d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        float xf = x1 * cp->scaleX;
2920d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
2930d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        out++;
2940d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        x1++;
2950d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
2960d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
2970d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
2980d6043caef208ee6c661eb17bcb376abfe90cd48Jason SamsRsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize (
2990d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams            RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
3000d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams            : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_RESIZE) {
3010d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
3020d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
3030d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
3040d6043caef208ee6c661eb17bcb376abfe90cd48Jason SamsRsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() {
3050d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
3060d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
307f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailesvoid RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot,
308f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                            const Allocation ** ains,
309f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                            uint32_t inLen, Allocation * aout,
310f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                            const void * usr, uint32_t usrLen,
311f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                            const RsScriptCall *sc)
3120d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams{
3130d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    if (!mAlloc.get()) {
3140d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        ALOGE("Resize executed without input, skipping");
3150d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        return;
3160d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
3170d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uint32_t srcHeight = mAlloc->mHal.drvState.lod[0].dimY;
3180d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uint32_t srcWidth = mAlloc->mHal.drvState.lod[0].dimX;
3190d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const size_t stride = mAlloc->mHal.drvState.lod[0].stride;
3200d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
3210d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    switch(mAlloc->getType()->getElement()->getVectorSize()) {
3220d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    case 1:
3230d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        mRootPtr = &kernelU1;
3240d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        break;
3250d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    case 2:
3260d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        mRootPtr = &kernelU2;
3270d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        break;
3280d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    case 3:
3290d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    case 4:
3300d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        mRootPtr = &kernelU4;
3310d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        break;
3320d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
3330d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
3340d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    scaleX = (float)srcWidth / aout->mHal.drvState.lod[0].dimX;
3350d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    scaleY = (float)srcHeight / aout->mHal.drvState.lod[0].dimY;
3360d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
3370d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
3380d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
3390d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsvoid RsdCpuScriptIntrinsicResize::populateScript(Script *s) {
3400d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    s->mHal.info.exportedVariableCount = 1;
3410d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
3420d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
3430d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsvoid RsdCpuScriptIntrinsicResize::invokeFreeChildren() {
3440d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    mAlloc.clear();
3450d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
3460d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
3470d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
3480d6043caef208ee6c661eb17bcb376abfe90cd48Jason SamsRsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
3490d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
3500d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return new RsdCpuScriptIntrinsicResize(ctx, s, e);
3510d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
352