10d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams/*
20d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * Copyright (C) 2014 The Android Open Source Project
30d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams *
40d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * Licensed under the Apache License, Version 2.0 (the "License");
50d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * you may not use this file except in compliance with the License.
60d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * You may obtain a copy of the License at
70d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams *
80d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams *      http://www.apache.org/licenses/LICENSE-2.0
90d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams *
100d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * Unless required by applicable law or agreed to in writing, software
110d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * distributed under the License is distributed on an "AS IS" BASIS,
120d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
130d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * See the License for the specific language governing permissions and
140d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams * limitations under the License.
150d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams */
160d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
170d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
180d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams#include "rsCpuIntrinsic.h"
190d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams#include "rsCpuIntrinsicInlines.h"
200d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
210d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsnamespace android {
220d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsnamespace renderscript {
230d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
240d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
250d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsclass RsdCpuScriptIntrinsicResize : public RsdCpuScriptIntrinsic {
260d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samspublic:
27c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines    void populateScript(Script *) override;
28c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines    void invokeFreeChildren() override;
290d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
30c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines    void setGlobalObj(uint32_t slot, ObjectBase *data) override;
310d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
32c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines    ~RsdCpuScriptIntrinsicResize() override;
330d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *);
340d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
35c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines    void preLaunch(uint32_t slot, const Allocation ** ains,
36c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines                   uint32_t inLen, Allocation * aout, const void * usr,
37c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines                   uint32_t usrLen, const RsScriptCall *sc) override;
380d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
390d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float scaleX;
400d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float scaleY;
410d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
420d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsprotected:
430d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    ObjectBaseRef<const Allocation> mAlloc;
440d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    ObjectBaseRef<const Element> mElement;
450d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
46b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelU1(const RsExpandKernelDriverInfo *info,
470d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                         uint32_t xstart, uint32_t xend,
489ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                         uint32_t outstep);
49b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelU2(const RsExpandKernelDriverInfo *info,
500d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                         uint32_t xstart, uint32_t xend,
519ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                         uint32_t outstep);
52b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelU4(const RsExpandKernelDriverInfo *info,
530d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                         uint32_t xstart, uint32_t xend,
549ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                         uint32_t outstep);
55b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelF1(const RsExpandKernelDriverInfo *info,
56d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                         uint32_t xstart, uint32_t xend,
57d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                         uint32_t outstep);
58b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelF2(const RsExpandKernelDriverInfo *info,
59d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                         uint32_t xstart, uint32_t xend,
60d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                         uint32_t outstep);
61b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelF4(const RsExpandKernelDriverInfo *info,
62d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                         uint32_t xstart, uint32_t xend,
63d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                         uint32_t outstep);
640d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams};
650d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
660d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsvoid RsdCpuScriptIntrinsicResize::setGlobalObj(uint32_t slot, ObjectBase *data) {
670d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    rsAssert(slot == 0);
680d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    mAlloc.set(static_cast<Allocation *>(data));
690d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
700d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
710d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsstatic float4 cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3, float x) {
720d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
730d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams            + x * (3.f * (p1 - p2) + p3 - p0)));
740d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
750d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
760d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsstatic float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) {
770d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
780d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams            + x * (3.f * (p1 - p2) + p3 - p0)));
790d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
800d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
810d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsstatic float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) {
820d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
830d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams            + x * (3.f * (p1 - p2) + p3 - p0)));
840d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
850d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
860d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsstatic uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3,
870d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                         float xf, float yf, int width) {
88a2bd5e85ddb7c4cc439f4b4646dafa21558ea5c7Miao Wang    int startx = (int) floor(xf - 1);
890d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    xf = xf - floor(xf);
900d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int maxx = width - 1;
910d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs0 = rsMax(0, startx + 0);
920d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs1 = rsMax(0, startx + 1);
930d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs2 = rsMin(maxx, startx + 2);
940d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs3 = rsMin(maxx, startx + 3);
950d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
960d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float4 p0  = cubicInterpolate(convert_float4(yp0[xs0]),
970d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp0[xs1]),
980d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp0[xs2]),
990d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp0[xs3]), xf);
1000d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1010d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float4 p1  = cubicInterpolate(convert_float4(yp1[xs0]),
1020d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp1[xs1]),
1030d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp1[xs2]),
1040d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp1[xs3]), xf);
1050d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1060d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float4 p2  = cubicInterpolate(convert_float4(yp2[xs0]),
1070d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp2[xs1]),
1080d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp2[xs2]),
1090d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp2[xs3]), xf);
1100d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1110d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float4 p3  = cubicInterpolate(convert_float4(yp3[xs0]),
1120d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp3[xs1]),
1130d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp3[xs2]),
1140d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float4(yp3[xs3]), xf);
1150d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1160d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float4 p  = cubicInterpolate(p0, p1, p2, p3, yf);
117ef05d4666eb87a924c8883e193fd505245101414Miao Wang    p = clamp(p + 0.5f, 0.f, 255.f);
1180d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return convert_uchar4(p);
1190d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
1200d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1210d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsstatic uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3,
1220d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                         float xf, float yf, int width) {
123a2bd5e85ddb7c4cc439f4b4646dafa21558ea5c7Miao Wang    int startx = (int) floor(xf - 1);
1240d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    xf = xf - floor(xf);
1250d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int maxx = width - 1;
1260d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs0 = rsMax(0, startx + 0);
1270d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs1 = rsMax(0, startx + 1);
1280d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs2 = rsMin(maxx, startx + 2);
1290d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs3 = rsMin(maxx, startx + 3);
1300d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1310d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float2 p0  = cubicInterpolate(convert_float2(yp0[xs0]),
1320d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp0[xs1]),
1330d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp0[xs2]),
1340d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp0[xs3]), xf);
1350d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1360d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float2 p1  = cubicInterpolate(convert_float2(yp1[xs0]),
1370d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp1[xs1]),
1380d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp1[xs2]),
1390d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp1[xs3]), xf);
1400d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1410d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float2 p2  = cubicInterpolate(convert_float2(yp2[xs0]),
1420d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp2[xs1]),
1430d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp2[xs2]),
1440d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp2[xs3]), xf);
1450d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1460d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float2 p3  = cubicInterpolate(convert_float2(yp3[xs0]),
1470d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp3[xs1]),
1480d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp3[xs2]),
1490d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                  convert_float2(yp3[xs3]), xf);
1500d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1510d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float2 p  = cubicInterpolate(p0, p1, p2, p3, yf);
152ef05d4666eb87a924c8883e193fd505245101414Miao Wang    p = clamp(p + 0.5f, 0.f, 255.f);
1530d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return convert_uchar2(p);
1540d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
1550d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1560d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsstatic uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3,
1570d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                        float xf, float yf, int width) {
158a2bd5e85ddb7c4cc439f4b4646dafa21558ea5c7Miao Wang    int startx = (int) floor(xf - 1);
1590d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    xf = xf - floor(xf);
1600d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int maxx = width - 1;
1610d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs0 = rsMax(0, startx + 0);
1620d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs1 = rsMax(0, startx + 1);
1630d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs2 = rsMin(maxx, startx + 2);
1640d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int xs3 = rsMin(maxx, startx + 3);
1650d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1660d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float p0  = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1],
1670d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                 (float)yp0[xs2], (float)yp0[xs3], xf);
1680d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float p1  = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1],
1690d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                 (float)yp1[xs2], (float)yp1[xs3], xf);
1700d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float p2  = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1],
1710d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                 (float)yp2[xs2], (float)yp2[xs3], xf);
1720d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float p3  = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1],
1730d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                 (float)yp3[xs2], (float)yp3[xs3], xf);
1740d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1750d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    float p  = cubicInterpolate(p0, p1, p2, p3, yf);
176ef05d4666eb87a924c8883e193fd505245101414Miao Wang    p = clamp(p + 0.5f, 0.f, 255.f);
1770d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return (uchar)p;
1780d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
1790d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
1803a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosieextern "C" uint64_t rsdIntrinsicResize_oscctl_K(uint32_t xinc);
1813a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
1823a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosieextern "C" void rsdIntrinsicResizeB4_K(
1833a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar4 *dst,
1843a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            size_t count,
1853a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uint32_t xf,
1863a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uint32_t xinc,
1873a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar4 const *srcn,
1883a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar4 const *src0,
1893a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar4 const *src1,
1903a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar4 const *src2,
1913a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            size_t xclip,
1923a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            size_t avail,
1933a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uint64_t osc_ctl,
1943a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            int32_t const *yr);
1953a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
1963a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosieextern "C" void rsdIntrinsicResizeB2_K(
1973a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar2 *dst,
1983a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            size_t count,
1993a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uint32_t xf,
2003a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uint32_t xinc,
2013a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar2 const *srcn,
2023a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar2 const *src0,
2033a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar2 const *src1,
2043a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar2 const *src2,
2053a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            size_t xclip,
2063a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            size_t avail,
2073a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uint64_t osc_ctl,
2083a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            int32_t const *yr);
2093a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
2103a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosieextern "C" void rsdIntrinsicResizeB1_K(
2113a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar *dst,
2123a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            size_t count,
2133a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uint32_t xf,
2143a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uint32_t xinc,
2153a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar const *srcn,
2163a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar const *src0,
2173a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar const *src1,
2183a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uchar const *src2,
2193a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            size_t xclip,
2203a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            size_t avail,
2213a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            uint64_t osc_ctl,
2223a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie            int32_t const *yr);
2233a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
2243a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie#if defined(ARCH_ARM_USE_INTRINSICS)
2253a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosiestatic void mkYCoeff(int32_t *yr, float yf) {
2263a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    int32_t yf1 = rint(yf * 0x10000);
2273a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    int32_t yf2 = rint(yf * yf * 0x10000);
2283a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    int32_t yf3 = rint(yf * yf * yf * 0x10000);
2293a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
2303a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    yr[0] = -(2 * yf2 - yf3 - yf1) >> 1;
2313a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    yr[1] = (3 * yf3 - 5 * yf2 + 0x20000) >> 1;
2323a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    yr[2] = (-3 * yf3 + 4 * yf2 + yf1) >> 1;
2333a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    yr[3] = -(yf3 - yf2) >> 1;
2343a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie}
2353a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie#endif
236d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
237d75cc84d23f372e55f7173c727129cd5de00748bMiao Wangstatic float4 OneBiCubic(const float4 *yp0, const float4 *yp1, const float4 *yp2, const float4 *yp3,
238d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                         float xf, float yf, int width) {
239d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int startx = (int) floor(xf - 1);
240d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    xf = xf - floor(xf);
241d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int maxx = width - 1;
242d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs0 = rsMax(0, startx + 0);
243d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs1 = rsMax(0, startx + 1);
244d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs2 = rsMin(maxx, startx + 2);
245d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs3 = rsMin(maxx, startx + 3);
246d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
247d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float4 p0  = cubicInterpolate(yp0[xs0], yp0[xs1],
248d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                  yp0[xs2], yp0[xs3], xf);
249d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float4 p1  = cubicInterpolate(yp1[xs0], yp1[xs1],
250d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                  yp1[xs2], yp1[xs3], xf);
251d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float4 p2  = cubicInterpolate(yp2[xs0], yp2[xs1],
252d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                  yp2[xs2], yp2[xs3], xf);
253d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float4 p3  = cubicInterpolate(yp3[xs0], yp3[xs1],
254d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                  yp3[xs2], yp3[xs3], xf);
255d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
256d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float4 p  = cubicInterpolate(p0, p1, p2, p3, yf);
257d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    return p;
258d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang}
259d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
260d75cc84d23f372e55f7173c727129cd5de00748bMiao Wangstatic float2 OneBiCubic(const float2 *yp0, const float2 *yp1, const float2 *yp2, const float2 *yp3,
261d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                         float xf, float yf, int width) {
262d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int startx = (int) floor(xf - 1);
263d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    xf = xf - floor(xf);
264d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int maxx = width - 1;
265d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs0 = rsMax(0, startx + 0);
266d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs1 = rsMax(0, startx + 1);
267d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs2 = rsMin(maxx, startx + 2);
268d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs3 = rsMin(maxx, startx + 3);
269d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
270d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float2 p0  = cubicInterpolate(yp0[xs0], yp0[xs1],
271d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                  yp0[xs2], yp0[xs3], xf);
272d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float2 p1  = cubicInterpolate(yp1[xs0], yp1[xs1],
273d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                  yp1[xs2], yp1[xs3], xf);
274d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float2 p2  = cubicInterpolate(yp2[xs0], yp2[xs1],
275d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                  yp2[xs2], yp2[xs3], xf);
276d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float2 p3  = cubicInterpolate(yp3[xs0], yp3[xs1],
277d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                  yp3[xs2], yp3[xs3], xf);
278d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
279d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float2 p  = cubicInterpolate(p0, p1, p2, p3, yf);
280d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    return p;
281d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang}
282d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
283d75cc84d23f372e55f7173c727129cd5de00748bMiao Wangstatic float OneBiCubic(const float *yp0, const float *yp1, const float *yp2, const float *yp3,
284d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                        float xf, float yf, int width) {
285d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int startx = (int) floor(xf - 1);
286d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    xf = xf - floor(xf);
287d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int maxx = width - 1;
288d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs0 = rsMax(0, startx + 0);
289d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs1 = rsMax(0, startx + 1);
290d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs2 = rsMin(maxx, startx + 2);
291d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int xs3 = rsMin(maxx, startx + 3);
292d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
293d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float p0  = cubicInterpolate(yp0[xs0], yp0[xs1],
294d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                 yp0[xs2], yp0[xs3], xf);
295d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float p1  = cubicInterpolate(yp1[xs0], yp1[xs1],
296d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                 yp1[xs2], yp1[xs3], xf);
297d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float p2  = cubicInterpolate(yp2[xs0], yp2[xs1],
298d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                 yp2[xs2], yp2[xs3], xf);
299d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float p3  = cubicInterpolate(yp3[xs0], yp3[xs1],
300d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                 yp3[xs2], yp3[xs3], xf);
301d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
302d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    float p  = cubicInterpolate(p0, p1, p2, p3, yf);
303d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    return p;
304d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang}
305d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
306b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelDriverInfo *info,
3070d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                                uint32_t xstart, uint32_t xend,
3089ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                                                uint32_t outstep) {
309b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
3100d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
3110d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    if (!cp->mAlloc.get()) {
3120d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        ALOGE("Resize executed without input, skipping");
3130d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        return;
3140d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
3150d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
3160d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
3170d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
3180d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
3190d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
320b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
321a2bd5e85ddb7c4cc439f4b4646dafa21558ea5c7Miao Wang    int starty = (int) floor(yf - 1);
3220d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    yf = yf - floor(yf);
3230d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int maxy = srcHeight - 1;
3240d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys0 = rsMax(0, starty + 0);
3250d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys1 = rsMax(0, starty + 1);
3260d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys2 = rsMin(maxy, starty + 2);
3270d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys3 = rsMin(maxy, starty + 3);
3280d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
3290d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0);
3300d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1);
3310d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2);
3320d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3);
3330d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
334b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    uchar4 *out = ((uchar4 *)info->outPtr[0]) + xstart;
3350d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uint32_t x1 = xstart;
3360d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uint32_t x2 = xend;
3370d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
3383a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie#if defined(ARCH_ARM_USE_INTRINSICS)
3393a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
3403a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
3413a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        long xf16 = rint(xf * 0x10000);
3423a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        uint32_t xinc16 = rint(cp->scaleX * 0x10000);
3433a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
3443a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int xoff = (xf16 >> 16) - 1;
3453a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int xclip = rsMax(0, xoff) - xoff;
3463a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int len = x2 - x1;
3473a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
3483a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int32_t yr[4];
3493a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
3503a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        mkYCoeff(yr, yf);
3513a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
3523a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        xoff += xclip;
3533a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
3543a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        rsdIntrinsicResizeB4_K(
3553a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                out, len,
3563a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                xf16 & 0xffff, xinc16,
3573a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
3583a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                xclip, srcWidth - xoff + xclip,
3593a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                osc_ctl, yr);
3603a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        out += len;
3613a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        x1 += len;
3623a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    }
3633a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie#endif
3643a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
3650d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    while(x1 < x2) {
366a2bd5e85ddb7c4cc439f4b4646dafa21558ea5c7Miao Wang        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
3670d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
3680d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        out++;
3690d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        x1++;
3700d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
3710d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
3720d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
373b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelDriverInfo *info,
3740d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                                uint32_t xstart, uint32_t xend,
3759ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                                                uint32_t outstep) {
376b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
3770d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
3780d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    if (!cp->mAlloc.get()) {
3790d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        ALOGE("Resize executed without input, skipping");
3800d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        return;
3810d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
3820d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
3830d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
3840d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
3850d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
3860d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
387b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
388a2bd5e85ddb7c4cc439f4b4646dafa21558ea5c7Miao Wang    int starty = (int) floor(yf - 1);
3890d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    yf = yf - floor(yf);
3900d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int maxy = srcHeight - 1;
3910d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys0 = rsMax(0, starty + 0);
3920d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys1 = rsMax(0, starty + 1);
3930d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys2 = rsMin(maxy, starty + 2);
3940d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys3 = rsMin(maxy, starty + 3);
3950d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
3960d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0);
3970d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1);
3980d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2);
3990d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3);
4000d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
401b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    uchar2 *out = ((uchar2 *)info->outPtr[0]) + xstart;
4020d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uint32_t x1 = xstart;
4030d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uint32_t x2 = xend;
4040d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
4053a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie#if defined(ARCH_ARM_USE_INTRINSICS)
4063a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
4073a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
4083a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        long xf16 = rint(xf * 0x10000);
4093a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        uint32_t xinc16 = rint(cp->scaleX * 0x10000);
4103a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
4113a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int xoff = (xf16 >> 16) - 1;
4123a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int xclip = rsMax(0, xoff) - xoff;
4133a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int len = x2 - x1;
4143a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
4153a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int32_t yr[4];
4163a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
4173a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        mkYCoeff(yr, yf);
4183a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
4193a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        xoff += xclip;
4203a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
4213a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        rsdIntrinsicResizeB2_K(
4223a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                out, len,
4233a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                xf16 & 0xffff, xinc16,
4243a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
4253a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                xclip, srcWidth - xoff + xclip,
4263a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                osc_ctl, yr);
4273a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        out += len;
4283a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        x1 += len;
4293a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    }
4303a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie#endif
4313a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
4320d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    while(x1 < x2) {
433a2bd5e85ddb7c4cc439f4b4646dafa21558ea5c7Miao Wang        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
4340d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
4350d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        out++;
4360d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        x1++;
4370d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
4380d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
4390d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
440b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelDriverInfo *info,
4410d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams                                                uint32_t xstart, uint32_t xend,
4429ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                                                uint32_t outstep) {
443b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
4440d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
4450d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    if (!cp->mAlloc.get()) {
4460d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        ALOGE("Resize executed without input, skipping");
4470d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        return;
4480d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
4490d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
4500d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
4510d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
4520d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
4530d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
454b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
455a2bd5e85ddb7c4cc439f4b4646dafa21558ea5c7Miao Wang    int starty = (int) floor(yf - 1);
4560d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    yf = yf - floor(yf);
4570d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int maxy = srcHeight - 1;
4580d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys0 = rsMax(0, starty + 0);
4590d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys1 = rsMax(0, starty + 1);
4600d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys2 = rsMin(maxy, starty + 2);
4610d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    int ys3 = rsMin(maxy, starty + 3);
4620d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
4630d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *yp0 = pin + stride * ys0;
4640d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *yp1 = pin + stride * ys1;
4650d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *yp2 = pin + stride * ys2;
4660d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uchar *yp3 = pin + stride * ys3;
4670d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
468b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    uchar *out = ((uchar *)info->outPtr[0]) + xstart;
4690d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uint32_t x1 = xstart;
4700d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    uint32_t x2 = xend;
4710d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
4723a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie#if defined(ARCH_ARM_USE_INTRINSICS)
4733a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
4743a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
4753a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        long xf16 = rint(xf * 0x10000);
4763a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        uint32_t xinc16 = rint(cp->scaleX * 0x10000);
4773a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
4783a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int xoff = (xf16 >> 16) - 1;
4793a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int xclip = rsMax(0, xoff) - xoff;
4803a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int len = x2 - x1;
4813a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
4823a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        int32_t yr[4];
4833a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
4843a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        mkYCoeff(yr, yf);
4853a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
4863a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        xoff += xclip;
4873a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
4883a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        rsdIntrinsicResizeB1_K(
4893a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                out, len,
4903a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                xf16 & 0xffff, xinc16,
4913a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
4923a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                xclip, srcWidth - xoff + xclip,
4933a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie                osc_ctl, yr);
4943a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        out += len;
4953a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie        x1 += len;
4963a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie    }
4973a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie#endif
4983a98366c7f53b20f8550ffac82732d60ece794b4Simon Hosie
4990d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    while(x1 < x2) {
500a2bd5e85ddb7c4cc439f4b4646dafa21558ea5c7Miao Wang        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
5010d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
5020d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        out++;
5030d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        x1++;
5040d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
5050d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
5060d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
507b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicResize::kernelF4(const RsExpandKernelDriverInfo *info,
508d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                                uint32_t xstart, uint32_t xend,
509d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                                uint32_t outstep) {
510b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
511d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
512d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    if (!cp->mAlloc.get()) {
513d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        ALOGE("Resize executed without input, skipping");
514d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        return;
515d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    }
516d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
517d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
518d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
519d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
520d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
521b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
522d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int starty = (int) floor(yf - 1);
523d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    yf = yf - floor(yf);
524d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int maxy = srcHeight - 1;
525d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys0 = rsMax(0, starty + 0);
526d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys1 = rsMax(0, starty + 1);
527d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys2 = rsMin(maxy, starty + 2);
528d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys3 = rsMin(maxy, starty + 3);
529d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
530d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float4 *yp0 = (const float4 *)(pin + stride * ys0);
531d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float4 *yp1 = (const float4 *)(pin + stride * ys1);
532d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float4 *yp2 = (const float4 *)(pin + stride * ys2);
533d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float4 *yp3 = (const float4 *)(pin + stride * ys3);
534d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
535b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    float4 *out = ((float4 *)info->outPtr[0]) + xstart;
536d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    uint32_t x1 = xstart;
537d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    uint32_t x2 = xend;
538d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
539d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    while(x1 < x2) {
540d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
541d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
542d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        out++;
543d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        x1++;
544d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    }
545d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang}
546d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
547b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicResize::kernelF2(const RsExpandKernelDriverInfo *info,
548d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                                uint32_t xstart, uint32_t xend,
549d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                                uint32_t outstep) {
550b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
551d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
552d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    if (!cp->mAlloc.get()) {
553d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        ALOGE("Resize executed without input, skipping");
554d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        return;
555d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    }
556d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
557d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
558d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
559d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
560d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
561b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
562d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int starty = (int) floor(yf - 1);
563d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    yf = yf - floor(yf);
564d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int maxy = srcHeight - 1;
565d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys0 = rsMax(0, starty + 0);
566d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys1 = rsMax(0, starty + 1);
567d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys2 = rsMin(maxy, starty + 2);
568d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys3 = rsMin(maxy, starty + 3);
569d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
570d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float2 *yp0 = (const float2 *)(pin + stride * ys0);
571d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float2 *yp1 = (const float2 *)(pin + stride * ys1);
572d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float2 *yp2 = (const float2 *)(pin + stride * ys2);
573d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float2 *yp3 = (const float2 *)(pin + stride * ys3);
574d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
575b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    float2 *out = ((float2 *)info->outPtr[0]) + xstart;
576d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    uint32_t x1 = xstart;
577d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    uint32_t x2 = xend;
578d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
579d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    while(x1 < x2) {
580d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
581d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
582d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        out++;
583d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        x1++;
584d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    }
585d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang}
586d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
587b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicResize::kernelF1(const RsExpandKernelDriverInfo *info,
588d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                                uint32_t xstart, uint32_t xend,
589d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang                                                uint32_t outstep) {
590b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
591d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
592d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    if (!cp->mAlloc.get()) {
593d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        ALOGE("Resize executed without input, skipping");
594d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        return;
595d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    }
596d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
597d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
598d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
599d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
600d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
601b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
602d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int starty = (int) floor(yf - 1);
603d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    yf = yf - floor(yf);
604d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int maxy = srcHeight - 1;
605d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys0 = rsMax(0, starty + 0);
606d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys1 = rsMax(0, starty + 1);
607d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys2 = rsMin(maxy, starty + 2);
608d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    int ys3 = rsMin(maxy, starty + 3);
609d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
610d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float *yp0 = (const float *)(pin + stride * ys0);
611d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float *yp1 = (const float *)(pin + stride * ys1);
612d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float *yp2 = (const float *)(pin + stride * ys2);
613d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    const float *yp3 = (const float *)(pin + stride * ys3);
614d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
615b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    float *out = ((float *)info->outPtr[0]) + xstart;
616d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    uint32_t x1 = xstart;
617d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    uint32_t x2 = xend;
618d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
619d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    while(x1 < x2) {
620d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
621d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
622d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        out++;
623d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        x1++;
624d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    }
625d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang}
626d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang
6270d6043caef208ee6c661eb17bcb376abfe90cd48Jason SamsRsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize (
6280d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams            RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
6290d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams            : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_RESIZE) {
6300d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
6310d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
6320d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
6330d6043caef208ee6c661eb17bcb376abfe90cd48Jason SamsRsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() {
6340d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
6350d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
636f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailesvoid RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot,
637f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                            const Allocation ** ains,
638f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                            uint32_t inLen, Allocation * aout,
639f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                            const void * usr, uint32_t usrLen,
640f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                            const RsScriptCall *sc)
6410d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams{
6420d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    if (!mAlloc.get()) {
6430d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        ALOGE("Resize executed without input, skipping");
6440d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams        return;
6450d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
6460d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uint32_t srcHeight = mAlloc->mHal.drvState.lod[0].dimY;
6470d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    const uint32_t srcWidth = mAlloc->mHal.drvState.lod[0].dimX;
6480d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
649d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    //check the data type to determine F or U.
650d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    if (mAlloc->getType()->getElement()->getType() == RS_TYPE_UNSIGNED_8) {
651d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        switch(mAlloc->getType()->getElement()->getVectorSize()) {
652d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        case 1:
653d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            mRootPtr = &kernelU1;
654d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            break;
655d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        case 2:
656d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            mRootPtr = &kernelU2;
657d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            break;
658d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        case 3:
659d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        case 4:
660d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            mRootPtr = &kernelU4;
661d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            break;
662d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        }
663d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang    } else {
664d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        switch(mAlloc->getType()->getElement()->getVectorSize()) {
665d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        case 1:
666d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            mRootPtr = &kernelF1;
667d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            break;
668d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        case 2:
669d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            mRootPtr = &kernelF2;
670d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            break;
671d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        case 3:
672d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        case 4:
673d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            mRootPtr = &kernelF4;
674d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang            break;
675d75cc84d23f372e55f7173c727129cd5de00748bMiao Wang        }
6760d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    }
6770d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
6780d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    scaleX = (float)srcWidth / aout->mHal.drvState.lod[0].dimX;
6790d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    scaleY = (float)srcHeight / aout->mHal.drvState.lod[0].dimY;
6800d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
6810d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
6820d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
6830d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsvoid RsdCpuScriptIntrinsicResize::populateScript(Script *s) {
6840d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    s->mHal.info.exportedVariableCount = 1;
6850d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
6860d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
6870d6043caef208ee6c661eb17bcb376abfe90cd48Jason Samsvoid RsdCpuScriptIntrinsicResize::invokeFreeChildren() {
6880d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    mAlloc.clear();
6890d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
6900d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
6910d6043caef208ee6c661eb17bcb376abfe90cd48Jason SamsRsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
6920d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams
6930d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams    return new RsdCpuScriptIntrinsicResize(ctx, s, e);
6940d6043caef208ee6c661eb17bcb376abfe90cd48Jason Sams}
695462de21ac2e1773b99aedee012adb374e476ae36Chih-Hung Hsieh
696462de21ac2e1773b99aedee012adb374e476ae36Chih-Hung Hsieh} // namespace renderscript
697462de21ac2e1773b99aedee012adb374e476ae36Chih-Hung Hsieh} // namespace android
698