10d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams/*
20d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * Copyright (C) 2014 The Android Open Source Project
30d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams *
40d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * Licensed under the Apache License, Version 2.0 (the "License");
50d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * you may not use this file except in compliance with the License.
60d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * You may obtain a copy of the License at
70d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams *
80d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams *      http://www.apache.org/licenses/LICENSE-2.0
90d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams *
100d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * Unless required by applicable law or agreed to in writing, software
110d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * distributed under the License is distributed on an "AS IS" BASIS,
120d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
130d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * See the License for the specific language governing permissions and
140d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * limitations under the License.
150d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams */
160d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
170d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
180d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams#include "rsCpuIntrinsic.h"
190d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams#include "rsCpuIntrinsicInlines.h"
200d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
210d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsusing namespace android;
220d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsusing namespace android::renderscript;
230d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
240d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsnamespace android {
250d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsnamespace renderscript {
260d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
270d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
280d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsclass RsdCpuScriptIntrinsicResize : public RsdCpuScriptIntrinsic {
290d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samspublic:
30c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines    void populateScript(Script *) override;
31c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines    void invokeFreeChildren() override;
320d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
33c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines    void setGlobalObj(uint32_t slot, ObjectBase *data) override;
340d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
35c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines    ~RsdCpuScriptIntrinsicResize() override;
360d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *);
370d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
38c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines    void preLaunch(uint32_t slot, const Allocation ** ains,
39c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines                   uint32_t inLen, Allocation * aout, const void * usr,
40c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines                   uint32_t usrLen, const RsScriptCall *sc) override;
410d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
420d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float scaleX;
430d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float scaleY;
440d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
450d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsprotected:
460d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    ObjectBaseRef<const Allocation> mAlloc;
470d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    ObjectBaseRef<const Element> mElement;
480d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
49b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelU1(const RsExpandKernelDriverInfo *info,
500d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                         uint32_t xstart, uint32_t xend,
519ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                         uint32_t outstep);
52b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelU2(const RsExpandKernelDriverInfo *info,
530d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                         uint32_t xstart, uint32_t xend,
549ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                         uint32_t outstep);
55b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelU4(const RsExpandKernelDriverInfo *info,
560d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                         uint32_t xstart, uint32_t xend,
579ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                         uint32_t outstep);
58b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelF1(const RsExpandKernelDriverInfo *info,
59d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                         uint32_t xstart, uint32_t xend,
60d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                         uint32_t outstep);
61b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelF2(const RsExpandKernelDriverInfo *info,
62d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                         uint32_t xstart, uint32_t xend,
63d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                         uint32_t outstep);
64b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelF4(const RsExpandKernelDriverInfo *info,
65d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                         uint32_t xstart, uint32_t xend,
66d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                         uint32_t outstep);
670d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams};
680d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
690d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
700d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
710d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
720d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
730d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsvoid RsdCpuScriptIntrinsicResize::setGlobalObj(uint32_t slot, ObjectBase *data) {
740d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    rsAssert(slot == 0);
750d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    mAlloc.set(static_cast<Allocation *>(data));
760d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
770d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
780d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsstatic float4 cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3, float x) {
790d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
800d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams            + x * (3.f * (p1 - p2) + p3 - p0)));
810d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
820d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
830d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsstatic float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) {
840d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
850d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams            + x * (3.f * (p1 - p2) + p3 - p0)));
860d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
870d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
880d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsstatic float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) {
890d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
900d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams            + x * (3.f * (p1 - p2) + p3 - p0)));
910d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
920d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
930d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsstatic uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3,
940d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                         float xf, float yf, int width) {
95a2bd5e85ddb7c4cc439f4b4646dafa21558ea5c7Miao Wang    int startx = (int) floor(xf - 1);
960d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    xf = xf - floor(xf);
970d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int maxx = width - 1;
980d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs0 = rsMax(0, startx + 0);
990d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs1 = rsMax(0, startx + 1);
1000d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs2 = rsMin(maxx, startx + 2);
1010d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs3 = rsMin(maxx, startx + 3);
1020d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1030d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float4 p0  = cubicInterpolate(convert_float4(yp0[xs0]),
1040d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp0[xs1]),
1050d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp0[xs2]),
1060d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp0[xs3]), xf);
1070d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1080d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float4 p1  = cubicInterpolate(convert_float4(yp1[xs0]),
1090d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp1[xs1]),
1100d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp1[xs2]),
1110d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp1[xs3]), xf);
1120d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1130d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float4 p2  = cubicInterpolate(convert_float4(yp2[xs0]),
1140d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp2[xs1]),
1150d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp2[xs2]),
1160d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp2[xs3]), xf);
1170d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1180d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float4 p3  = cubicInterpolate(convert_float4(yp3[xs0]),
1190d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp3[xs1]),
1200d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp3[xs2]),
1210d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp3[xs3]), xf);
1220d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1230d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float4 p  = cubicInterpolate(p0, p1, p2, p3, yf);
124ef05d4666eb87a924c8883e193fd505245101414Miao Wang    p = clamp(p + 0.5f, 0.f, 255.f);
1250d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return convert_uchar4(p);
1260d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
1270d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1280d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsstatic uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3,
1290d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                         float xf, float yf, int width) {
130a2bd5e85ddb7c4cc439f4b4646dafa21558ea5c7Miao Wang    int startx = (int) floor(xf - 1);
1310d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    xf = xf - floor(xf);
1320d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int maxx = width - 1;
1330d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs0 = rsMax(0, startx + 0);
1340d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs1 = rsMax(0, startx + 1);
1350d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs2 = rsMin(maxx, startx + 2);
1360d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs3 = rsMin(maxx, startx + 3);
1370d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1380d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float2 p0  = cubicInterpolate(convert_float2(yp0[xs0]),
1390d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp0[xs1]),
1400d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp0[xs2]),
1410d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp0[xs3]), xf);
1420d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1430d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float2 p1  = cubicInterpolate(convert_float2(yp1[xs0]),
1440d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp1[xs1]),
1450d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp1[xs2]),
1460d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp1[xs3]), xf);
1470d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1480d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float2 p2  = cubicInterpolate(convert_float2(yp2[xs0]),
1490d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp2[xs1]),
1500d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp2[xs2]),
1510d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp2[xs3]), xf);
1520d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1530d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float2 p3  = cubicInterpolate(convert_float2(yp3[xs0]),
1540d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp3[xs1]),
1550d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp3[xs2]),
1560d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp3[xs3]), xf);
1570d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1580d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float2 p  = cubicInterpolate(p0, p1, p2, p3, yf);
159ef05d4666eb87a924c8883e193fd505245101414Miao Wang    p = clamp(p + 0.5f, 0.f, 255.f);
1600d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return convert_uchar2(p);
1610d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
1620d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1630d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsstatic uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3,
1640d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                        float xf, float yf, int width) {
165a2bd5e85ddb7c4cc439f4b4646dafa21558ea5c7Miao Wang    int startx = (int) floor(xf - 1);
1660d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    xf = xf - floor(xf);
1670d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int maxx = width - 1;
1680d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs0 = rsMax(0, startx + 0);
1690d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs1 = rsMax(0, startx + 1);
1700d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs2 = rsMin(maxx, startx + 2);
1710d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs3 = rsMin(maxx, startx + 3);
1720d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1730d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float p0  = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1],
1740d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                 (float)yp0[xs2], (float)yp0[xs3], xf);
1750d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float p1  = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1],
1760d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                 (float)yp1[xs2], (float)yp1[xs3], xf);
1770d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float p2  = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1],
1780d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                 (float)yp2[xs2], (float)yp2[xs3], xf);
1790d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float p3  = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1],
1800d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                 (float)yp3[xs2], (float)yp3[xs3], xf);
1810d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1820d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float p  = cubicInterpolate(p0, p1, p2, p3, yf);
183ef05d4666eb87a924c8883e193fd505245101414Miao Wang    p = clamp(p + 0.5f, 0.f, 255.f);
1840d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return (uchar)p;
1850d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
1860d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1873a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosieextern "C" uint64_t rsdIntrinsicResize_oscctl_K(uint32_t xinc);
1883a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
1893a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosieextern "C" void rsdIntrinsicResizeB4_K(
1903a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar4 *dst,
1913a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            size_t count,
1923a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uint32_t xf,
1933a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uint32_t xinc,
1943a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar4 const *srcn,
1953a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar4 const *src0,
1963a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar4 const *src1,
1973a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar4 const *src2,
1983a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            size_t xclip,
1993a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            size_t avail,
2003a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uint64_t osc_ctl,
2013a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            int32_t const *yr);
2023a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
2033a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosieextern "C" void rsdIntrinsicResizeB2_K(
2043a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar2 *dst,
2053a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            size_t count,
2063a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uint32_t xf,
2073a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uint32_t xinc,
2083a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar2 const *srcn,
2093a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar2 const *src0,
2103a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar2 const *src1,
2113a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar2 const *src2,
2123a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            size_t xclip,
2133a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            size_t avail,
2143a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uint64_t osc_ctl,
2153a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            int32_t const *yr);
2163a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
2173a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosieextern "C" void rsdIntrinsicResizeB1_K(
2183a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar *dst,
2193a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            size_t count,
2203a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uint32_t xf,
2213a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uint32_t xinc,
2223a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar const *srcn,
2233a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar const *src0,
2243a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar const *src1,
2253a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar const *src2,
2263a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            size_t xclip,
2273a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            size_t avail,
2283a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uint64_t osc_ctl,
2293a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            int32_t const *yr);
2303a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
2313a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie#if defined(ARCH_ARM_USE_INTRINSICS)
2323a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosiestatic void mkYCoeff(int32_t *yr, float yf) {
2333a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    int32_t yf1 = rint(yf * 0x10000);
2343a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    int32_t yf2 = rint(yf * yf * 0x10000);
2353a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    int32_t yf3 = rint(yf * yf * yf * 0x10000);
2363a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
2373a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    yr[0] = -(2 * yf2 - yf3 - yf1) >> 1;
2383a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    yr[1] = (3 * yf3 - 5 * yf2 + 0x20000) >> 1;
2393a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    yr[2] = (-3 * yf3 + 4 * yf2 + yf1) >> 1;
2403a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    yr[3] = -(yf3 - yf2) >> 1;
2413a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie}
2423a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie#endif
243d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
244d75cc84d23f372e55f7173c727129cd5de00748bMiao Wangstatic float4 OneBiCubic(const float4 *yp0, const float4 *yp1, const float4 *yp2, const float4 *yp3,
245d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                         float xf, float yf, int width) {
246d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int startx = (int) floor(xf - 1);
247d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    xf = xf - floor(xf);
248d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int maxx = width - 1;
249d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs0 = rsMax(0, startx + 0);
250d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs1 = rsMax(0, startx + 1);
251d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs2 = rsMin(maxx, startx + 2);
252d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs3 = rsMin(maxx, startx + 3);
253d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
254d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float4 p0  = cubicInterpolate(yp0[xs0], yp0[xs1],
255d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                  yp0[xs2], yp0[xs3], xf);
256d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float4 p1  = cubicInterpolate(yp1[xs0], yp1[xs1],
257d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                  yp1[xs2], yp1[xs3], xf);
258d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float4 p2  = cubicInterpolate(yp2[xs0], yp2[xs1],
259d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                  yp2[xs2], yp2[xs3], xf);
260d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float4 p3  = cubicInterpolate(yp3[xs0], yp3[xs1],
261d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                  yp3[xs2], yp3[xs3], xf);
262d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
263d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float4 p  = cubicInterpolate(p0, p1, p2, p3, yf);
264d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    return p;
265d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang}
266d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
267d75cc84d23f372e55f7173c727129cd5de00748bMiao Wangstatic float2 OneBiCubic(const float2 *yp0, const float2 *yp1, const float2 *yp2, const float2 *yp3,
268d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                         float xf, float yf, int width) {
269d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int startx = (int) floor(xf - 1);
270d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    xf = xf - floor(xf);
271d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int maxx = width - 1;
272d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs0 = rsMax(0, startx + 0);
273d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs1 = rsMax(0, startx + 1);
274d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs2 = rsMin(maxx, startx + 2);
275d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs3 = rsMin(maxx, startx + 3);
276d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
277d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float2 p0  = cubicInterpolate(yp0[xs0], yp0[xs1],
278d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                  yp0[xs2], yp0[xs3], xf);
279d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float2 p1  = cubicInterpolate(yp1[xs0], yp1[xs1],
280d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                  yp1[xs2], yp1[xs3], xf);
281d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float2 p2  = cubicInterpolate(yp2[xs0], yp2[xs1],
282d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                  yp2[xs2], yp2[xs3], xf);
283d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float2 p3  = cubicInterpolate(yp3[xs0], yp3[xs1],
284d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                  yp3[xs2], yp3[xs3], xf);
285d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
286d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float2 p  = cubicInterpolate(p0, p1, p2, p3, yf);
287d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    return p;
288d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang}
289d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
290d75cc84d23f372e55f7173c727129cd5de00748bMiao Wangstatic float OneBiCubic(const float *yp0, const float *yp1, const float *yp2, const float *yp3,
291d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                        float xf, float yf, int width) {
292d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int startx = (int) floor(xf - 1);
293d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    xf = xf - floor(xf);
294d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int maxx = width - 1;
295d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs0 = rsMax(0, startx + 0);
296d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs1 = rsMax(0, startx + 1);
297d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs2 = rsMin(maxx, startx + 2);
298d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs3 = rsMin(maxx, startx + 3);
299d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
300d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float p0  = cubicInterpolate(yp0[xs0], yp0[xs1],
301d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                 yp0[xs2], yp0[xs3], xf);
302d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float p1  = cubicInterpolate(yp1[xs0], yp1[xs1],
303d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                 yp1[xs2], yp1[xs3], xf);
304d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float p2  = cubicInterpolate(yp2[xs0], yp2[xs1],
305d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                 yp2[xs2], yp2[xs3], xf);
306d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float p3  = cubicInterpolate(yp3[xs0], yp3[xs1],
307d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                 yp3[xs2], yp3[xs3], xf);
308d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
309d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float p  = cubicInterpolate(p0, p1, p2, p3, yf);
310d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    return p;
311d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang}
312d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
313b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelDriverInfo *info,
3140d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                                uint32_t xstart, uint32_t xend,
3159ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                                                uint32_t outstep) {
316b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
3170d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
3180d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    if (!cp->mAlloc.get()) {
3190d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        ALOGE("Resize executed without input, skipping");
3200d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        return;
3210d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
3220d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
3230d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
3240d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
3250d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
3260d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
327b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
328a2bd5e85ddb7c4cc439f4b4646dafa21558ea5c7Miao Wang    int starty = (int) floor(yf - 1);
3290d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    yf = yf - floor(yf);
3300d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int maxy = srcHeight - 1;
3310d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys0 = rsMax(0, starty + 0);
3320d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys1 = rsMax(0, starty + 1);
3330d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys2 = rsMin(maxy, starty + 2);
3340d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys3 = rsMin(maxy, starty + 3);
3350d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
3360d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0);
3370d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1);
3380d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2);
3390d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3);
3400d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
341b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    uchar4 *out = ((uchar4 *)info->outPtr[0]) + xstart;
3420d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uint32_t x1 = xstart;
3430d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uint32_t x2 = xend;
3440d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
3453a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie#if defined(ARCH_ARM_USE_INTRINSICS)
3463a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
3473a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
3483a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        long xf16 = rint(xf * 0x10000);
3493a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        uint32_t xinc16 = rint(cp->scaleX * 0x10000);
3503a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
3513a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int xoff = (xf16 >> 16) - 1;
3523a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int xclip = rsMax(0, xoff) - xoff;
3533a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int len = x2 - x1;
3543a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
3553a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int32_t yr[4];
3563a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
3573a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        mkYCoeff(yr, yf);
3583a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
3593a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        xoff += xclip;
3603a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
3613a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        rsdIntrinsicResizeB4_K(
3623a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                out, len,
3633a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                xf16 & 0xffff, xinc16,
3643a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
3653a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                xclip, srcWidth - xoff + xclip,
3663a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                osc_ctl, yr);
3673a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        out += len;
3683a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        x1 += len;
3693a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    }
3703a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie#endif
3713a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
3720d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    while(x1 < x2) {
373a2bd5e85ddb7c4cc439f4b4646dafa21558ea5c7Miao Wang        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
3740d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
3750d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        out++;
3760d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        x1++;
3770d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
3780d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
3790d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
380b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelDriverInfo *info,
3810d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                                uint32_t xstart, uint32_t xend,
3829ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                                                uint32_t outstep) {
383b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
3840d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
3850d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    if (!cp->mAlloc.get()) {
3860d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        ALOGE("Resize executed without input, skipping");
3870d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        return;
3880d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
3890d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
3900d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
3910d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
3920d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
3930d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
394b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
395a2bd5e85ddb7c4cc439f4b4646dafa21558ea5c7Miao Wang    int starty = (int) floor(yf - 1);
3960d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    yf = yf - floor(yf);
3970d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int maxy = srcHeight - 1;
3980d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys0 = rsMax(0, starty + 0);
3990d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys1 = rsMax(0, starty + 1);
4000d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys2 = rsMin(maxy, starty + 2);
4010d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys3 = rsMin(maxy, starty + 3);
4020d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
4030d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0);
4040d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1);
4050d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2);
4060d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3);
4070d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
408b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    uchar2 *out = ((uchar2 *)info->outPtr[0]) + xstart;
4090d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uint32_t x1 = xstart;
4100d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uint32_t x2 = xend;
4110d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
4123a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie#if defined(ARCH_ARM_USE_INTRINSICS)
4133a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
4143a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
4153a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        long xf16 = rint(xf * 0x10000);
4163a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        uint32_t xinc16 = rint(cp->scaleX * 0x10000);
4173a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
4183a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int xoff = (xf16 >> 16) - 1;
4193a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int xclip = rsMax(0, xoff) - xoff;
4203a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int len = x2 - x1;
4213a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
4223a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int32_t yr[4];
4233a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
4243a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        mkYCoeff(yr, yf);
4253a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
4263a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        xoff += xclip;
4273a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
4283a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        rsdIntrinsicResizeB2_K(
4293a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                out, len,
4303a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                xf16 & 0xffff, xinc16,
4313a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
4323a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                xclip, srcWidth - xoff + xclip,
4333a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                osc_ctl, yr);
4343a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        out += len;
4353a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        x1 += len;
4363a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    }
4373a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie#endif
4383a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
4390d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    while(x1 < x2) {
440a2bd5e85ddb7c4cc439f4b4646dafa21558ea5c7Miao Wang        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
4410d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
4420d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        out++;
4430d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        x1++;
4440d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
4450d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
4460d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
447b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelDriverInfo *info,
4480d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                                uint32_t xstart, uint32_t xend,
4499ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                                                uint32_t outstep) {
450b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
4510d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
4520d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    if (!cp->mAlloc.get()) {
4530d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        ALOGE("Resize executed without input, skipping");
4540d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        return;
4550d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
4560d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
4570d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
4580d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
4590d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
4600d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
461b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
462a2bd5e85ddb7c4cc439f4b4646dafa21558ea5c7Miao Wang    int starty = (int) floor(yf - 1);
4630d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    yf = yf - floor(yf);
4640d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int maxy = srcHeight - 1;
4650d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys0 = rsMax(0, starty + 0);
4660d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys1 = rsMax(0, starty + 1);
4670d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys2 = rsMin(maxy, starty + 2);
4680d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys3 = rsMin(maxy, starty + 3);
4690d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
4700d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *yp0 = pin + stride * ys0;
4710d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *yp1 = pin + stride * ys1;
4720d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *yp2 = pin + stride * ys2;
4730d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *yp3 = pin + stride * ys3;
4740d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
475b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    uchar *out = ((uchar *)info->outPtr[0]) + xstart;
4760d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uint32_t x1 = xstart;
4770d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uint32_t x2 = xend;
4780d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
4793a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie#if defined(ARCH_ARM_USE_INTRINSICS)
4803a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
4813a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
4823a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        long xf16 = rint(xf * 0x10000);
4833a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        uint32_t xinc16 = rint(cp->scaleX * 0x10000);
4843a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
4853a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int xoff = (xf16 >> 16) - 1;
4863a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int xclip = rsMax(0, xoff) - xoff;
4873a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int len = x2 - x1;
4883a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
4893a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int32_t yr[4];
4903a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
4913a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        mkYCoeff(yr, yf);
4923a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
4933a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        xoff += xclip;
4943a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
4953a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        rsdIntrinsicResizeB1_K(
4963a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                out, len,
4973a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                xf16 & 0xffff, xinc16,
4983a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
4993a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                xclip, srcWidth - xoff + xclip,
5003a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                osc_ctl, yr);
5013a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        out += len;
5023a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        x1 += len;
5033a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    }
5043a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie#endif
5053a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
5060d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    while(x1 < x2) {
507a2bd5e85ddb7c4cc439f4b4646dafa21558ea5c7Miao Wang        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
5080d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
5090d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        out++;
5100d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        x1++;
5110d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
5120d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
5130d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
514b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicResize::kernelF4(const RsExpandKernelDriverInfo *info,
515d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                                uint32_t xstart, uint32_t xend,
516d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                                uint32_t outstep) {
517b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
518d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
519d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    if (!cp->mAlloc.get()) {
520d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        ALOGE("Resize executed without input, skipping");
521d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        return;
522d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    }
523d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
524d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
525d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
526d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
527d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
528b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
529d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int starty = (int) floor(yf - 1);
530d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    yf = yf - floor(yf);
531d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int maxy = srcHeight - 1;
532d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys0 = rsMax(0, starty + 0);
533d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys1 = rsMax(0, starty + 1);
534d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys2 = rsMin(maxy, starty + 2);
535d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys3 = rsMin(maxy, starty + 3);
536d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
537d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float4 *yp0 = (const float4 *)(pin + stride * ys0);
538d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float4 *yp1 = (const float4 *)(pin + stride * ys1);
539d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float4 *yp2 = (const float4 *)(pin + stride * ys2);
540d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float4 *yp3 = (const float4 *)(pin + stride * ys3);
541d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
542b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    float4 *out = ((float4 *)info->outPtr[0]) + xstart;
543d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    uint32_t x1 = xstart;
544d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    uint32_t x2 = xend;
545d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
546d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    while(x1 < x2) {
547d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
548d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
549d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        out++;
550d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        x1++;
551d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    }
552d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang}
553d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
554b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicResize::kernelF2(const RsExpandKernelDriverInfo *info,
555d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                                uint32_t xstart, uint32_t xend,
556d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                                uint32_t outstep) {
557b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
558d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
559d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    if (!cp->mAlloc.get()) {
560d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        ALOGE("Resize executed without input, skipping");
561d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        return;
562d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    }
563d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
564d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
565d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
566d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
567d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
568b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
569d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int starty = (int) floor(yf - 1);
570d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    yf = yf - floor(yf);
571d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int maxy = srcHeight - 1;
572d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys0 = rsMax(0, starty + 0);
573d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys1 = rsMax(0, starty + 1);
574d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys2 = rsMin(maxy, starty + 2);
575d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys3 = rsMin(maxy, starty + 3);
576d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
577d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float2 *yp0 = (const float2 *)(pin + stride * ys0);
578d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float2 *yp1 = (const float2 *)(pin + stride * ys1);
579d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float2 *yp2 = (const float2 *)(pin + stride * ys2);
580d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float2 *yp3 = (const float2 *)(pin + stride * ys3);
581d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
582b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    float2 *out = ((float2 *)info->outPtr[0]) + xstart;
583d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    uint32_t x1 = xstart;
584d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    uint32_t x2 = xend;
585d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
586d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    while(x1 < x2) {
587d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
588d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
589d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        out++;
590d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        x1++;
591d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    }
592d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang}
593d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
594b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicResize::kernelF1(const RsExpandKernelDriverInfo *info,
595d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                                uint32_t xstart, uint32_t xend,
596d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                                uint32_t outstep) {
597b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
598d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
599d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    if (!cp->mAlloc.get()) {
600d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        ALOGE("Resize executed without input, skipping");
601d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        return;
602d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    }
603d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
604d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
605d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
606d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
607d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
608b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
609d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int starty = (int) floor(yf - 1);
610d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    yf = yf - floor(yf);
611d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int maxy = srcHeight - 1;
612d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys0 = rsMax(0, starty + 0);
613d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys1 = rsMax(0, starty + 1);
614d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys2 = rsMin(maxy, starty + 2);
615d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys3 = rsMin(maxy, starty + 3);
616d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
617d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float *yp0 = (const float *)(pin + stride * ys0);
618d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float *yp1 = (const float *)(pin + stride * ys1);
619d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float *yp2 = (const float *)(pin + stride * ys2);
620d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float *yp3 = (const float *)(pin + stride * ys3);
621d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
622b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    float *out = ((float *)info->outPtr[0]) + xstart;
623d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    uint32_t x1 = xstart;
624d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    uint32_t x2 = xend;
625d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
626d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    while(x1 < x2) {
627d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
628d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
629d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        out++;
630d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        x1++;
631d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    }
632d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang}
633d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
6340d6043caef208ee6c661eb17bcb376abfe90cd48Jason SamsRsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize (
6350d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams            RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
6360d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams            : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_RESIZE) {
6370d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
6380d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
6390d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
6400d6043caef208ee6c661eb17bcb376abfe90cd48Jason SamsRsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() {
6410d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
6420d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
643f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailesvoid RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot,
644f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                            const Allocation ** ains,
645f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                            uint32_t inLen, Allocation * aout,
646f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                            const void * usr, uint32_t usrLen,
647f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                            const RsScriptCall *sc)
6480d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams{
6490d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    if (!mAlloc.get()) {
6500d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        ALOGE("Resize executed without input, skipping");
6510d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        return;
6520d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
6530d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uint32_t srcHeight = mAlloc->mHal.drvState.lod[0].dimY;
6540d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uint32_t srcWidth = mAlloc->mHal.drvState.lod[0].dimX;
6550d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const size_t stride = mAlloc->mHal.drvState.lod[0].stride;
6560d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
657d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    //check the data type to determine F or U.
658d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    if (mAlloc->getType()->getElement()->getType() == RS_TYPE_UNSIGNED_8) {
659d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        switch(mAlloc->getType()->getElement()->getVectorSize()) {
660d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        case 1:
661d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            mRootPtr = &kernelU1;
662d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            break;
663d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        case 2:
664d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            mRootPtr = &kernelU2;
665d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            break;
666d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        case 3:
667d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        case 4:
668d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            mRootPtr = &kernelU4;
669d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            break;
670d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        }
671d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    } else {
672d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        switch(mAlloc->getType()->getElement()->getVectorSize()) {
673d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        case 1:
674d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            mRootPtr = &kernelF1;
675d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            break;
676d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        case 2:
677d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            mRootPtr = &kernelF2;
678d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            break;
679d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        case 3:
680d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        case 4:
681d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            mRootPtr = &kernelF4;
682d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            break;
683d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        }
6840d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
6850d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
6860d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    scaleX = (float)srcWidth / aout->mHal.drvState.lod[0].dimX;
6870d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    scaleY = (float)srcHeight / aout->mHal.drvState.lod[0].dimY;
6880d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
6890d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
6900d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
6910d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsvoid RsdCpuScriptIntrinsicResize::populateScript(Script *s) {
6920d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    s->mHal.info.exportedVariableCount = 1;
6930d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
6940d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
6950d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsvoid RsdCpuScriptIntrinsicResize::invokeFreeChildren() {
6960d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    mAlloc.clear();
6970d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
6980d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
6990d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
7000d6043caef208ee6c661eb17bcb376abfe90cd48Jason SamsRsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
7010d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
7020d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return new RsdCpuScriptIntrinsicResize(ctx, s, e);
7030d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
704