11176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/*
21176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Copyright © 2011 SCore Corporation
31176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *
41176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Permission is hereby granted, free of charge, to any person obtaining a
51176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * copy of this software and associated documentation files (the "Software"),
61176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * to deal in the Software without restriction, including without limitation
71176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * the rights to use, copy, modify, merge, publish, distribute, sublicense,
81176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * and/or sell copies of the Software, and to permit persons to whom the
91176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Software is furnished to do so, subject to the following conditions:
101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *
111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * The above copyright notice and this permission notice (including the next
121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * paragraph) shall be included in all copies or substantial portions of the
131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Software.
141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *
151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * DEALINGS IN THE SOFTWARE.
221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *
231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Author:  Siarhei Siamashka (siarhei.siamashka@nokia.com)
241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Author:  Taekyun Kim (tkq.kim@samsung.com)
251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */
261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/*
281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * This file contains scaled bilinear scanline functions implemented
291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * using older siarhei's bilinear macro template.
301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *
311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * << General scanline function procedures >>
321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  1. bilinear interpolate source pixels
331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  2. load mask pixels
341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  3. load destination pixels
351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  4. duplicate mask to fill whole register
361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  5. interleave source & destination pixels
371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  6. apply mask to source pixels
381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  7. combine source & destination pixels
391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  8, Deinterleave final result
401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  9. store destination pixels
411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *
421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * All registers with single number (i.e. src0, tmp0) are 64-bits registers.
431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Registers with double numbers(src01, dst01) are 128-bits registers.
441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * All temp registers can be used freely outside the code block.
451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Assume that symbol(register .req) OUT and MASK are defined at caller of these macro blocks.
461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *
471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Remarks
481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  There can be lots of pipeline stalls inside code block and between code blocks.
491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  Further optimizations will be done by new macro templates using head/tail_head/tail scheme.
501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */
511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* Prevent the stack from becoming executable for no reason... */
531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#if defined(__linux__) && defined (__ELF__)
541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.section .note.GNU-stack,"",%progbits
551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#endif
561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.text
581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.fpu neon
591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.arch armv7a
601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.object_arch armv4
611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.eabi_attribute 10, 0
621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.eabi_attribute 12, 0
631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.arm
641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.altmacro
651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.p2align 2
661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#include "pixman-private.h"
681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#include "pixman-arm-neon-asm.h"
691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/*
711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Bilinear macros from pixman-arm-neon-asm.S
721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */
731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* Supplementary macro for setting function attributes */
751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro pixman_asm_function fname
761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .func fname
771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .global fname
781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#ifdef __ELF__
791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .hidden fname
801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .type fname, %function
811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#endif
821176bdada62cabc6ec4b0308a930e83b679d5d36John Reckfname:
831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/*
861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Bilinear scaling support code which tries to provide pixel fetching, color
871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * format conversion, and interpolation as separate macros which can be used
881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * as the basic building blocks for constructing bilinear scanline functions.
891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */
901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_load_8888 reg1, reg2, tmp
921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov       TMP1, X, asr #16
931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add       X, X, UX
941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add       TMP1, TOP, TMP1, asl #2
951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32   {reg1}, [TMP1], STRIDE
961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32   {reg2}, [TMP1]
971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_load_0565 reg1, reg2, tmp
1001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov       TMP1, X, asr #16
1011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add       X, X, UX
1021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add       TMP1, TOP, TMP1, asl #1
1031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32   {reg2[0]}, [TMP1], STRIDE
1041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32   {reg2[1]}, [TMP1]
1051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
1061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
1071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
1081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_load_and_vertical_interpolate_two_8888 \
1091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                    acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2
1101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
1111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_load_8888 reg1, reg2, tmp1
1121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8  acc1, reg1, d28
1131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8  acc1, reg2, d29
1141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_load_8888 reg3, reg4, tmp2
1151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8  acc2, reg3, d28
1161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8  acc2, reg4, d29
1171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
1181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
1191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_load_and_vertical_interpolate_four_8888 \
1201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
1211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
1221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
1231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_load_and_vertical_interpolate_two_8888 \
1241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi
1251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_load_and_vertical_interpolate_two_8888 \
1261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
1271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
1281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
1291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_load_and_vertical_interpolate_two_0565 \
1301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi
1311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
1321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov       TMP1, X, asr #16
1331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add       X, X, UX
1341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add       TMP1, TOP, TMP1, asl #1
1351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov       TMP2, X, asr #16
1361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add       X, X, UX
1371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add       TMP2, TOP, TMP2, asl #1
1381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32   {acc2lo[0]}, [TMP1], STRIDE
1391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32   {acc2hi[0]}, [TMP2], STRIDE
1401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32   {acc2lo[1]}, [TMP1]
1411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32   {acc2hi[1]}, [TMP2]
1421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    convert_0565_to_x888 acc2, reg3, reg2, reg1
1431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vzip.u8   reg1, reg3
1441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vzip.u8   reg2, reg4
1451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vzip.u8   reg3, reg4
1461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vzip.u8   reg1, reg2
1471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8  acc1, reg1, d28
1481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8  acc1, reg2, d29
1491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8  acc2, reg3, d28
1501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8  acc2, reg4, d29
1511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
1521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
1531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_load_and_vertical_interpolate_four_0565 \
1541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
1551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
1561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
1571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov       TMP1, X, asr #16
1581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add       X, X, UX
1591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add       TMP1, TOP, TMP1, asl #1
1601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov       TMP2, X, asr #16
1611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add       X, X, UX
1621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add       TMP2, TOP, TMP2, asl #1
1631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32   {xacc2lo[0]}, [TMP1], STRIDE
1641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32   {xacc2hi[0]}, [TMP2], STRIDE
1651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32   {xacc2lo[1]}, [TMP1]
1661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32   {xacc2hi[1]}, [TMP2]
1671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1
1681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov       TMP1, X, asr #16
1691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add       X, X, UX
1701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add       TMP1, TOP, TMP1, asl #1
1711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov       TMP2, X, asr #16
1721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add       X, X, UX
1731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add       TMP2, TOP, TMP2, asl #1
1741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32   {yacc2lo[0]}, [TMP1], STRIDE
1751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vzip.u8   xreg1, xreg3
1761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32   {yacc2hi[0]}, [TMP2], STRIDE
1771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vzip.u8   xreg2, xreg4
1781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32   {yacc2lo[1]}, [TMP1]
1791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vzip.u8   xreg3, xreg4
1801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32   {yacc2hi[1]}, [TMP2]
1811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vzip.u8   xreg1, xreg2
1821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1
1831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8  xacc1, xreg1, d28
1841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vzip.u8   yreg1, yreg3
1851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8  xacc1, xreg2, d29
1861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vzip.u8   yreg2, yreg4
1871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8  xacc2, xreg3, d28
1881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vzip.u8   yreg3, yreg4
1891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8  xacc2, xreg4, d29
1901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vzip.u8   yreg1, yreg2
1911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8  yacc1, yreg1, d28
1921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8  yacc1, yreg2, d29
1931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8  yacc2, yreg3, d28
1941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8  yacc2, yreg4, d29
1951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
1961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
1971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_store_8888 numpix, tmp1, tmp2
1981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.if numpix == 4
1991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vst1.32   {d0, d1}, [OUT]!
2001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.elseif numpix == 2
2011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vst1.32   {d0}, [OUT]!
2021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.elseif numpix == 1
2031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vst1.32   {d0[0]}, [OUT, :32]!
2041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.else
2051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .error bilinear_store_8888 numpix is unsupported
2061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endif
2071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
2081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
2091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_store_0565 numpix, tmp1, tmp2
2101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.u8 d0, d1
2111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.u8 d2, d3
2121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.u8 d1, d3
2131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.u8 d0, d2
2141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2
2151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.if numpix == 4
2161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vst1.16   {d2}, [OUT]!
2171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.elseif numpix == 2
2181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vst1.32   {d2[0]}, [OUT]!
2191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.elseif numpix == 1
2201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vst1.16   {d2[0]}, [OUT]!
2211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.else
2221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .error bilinear_store_0565 numpix is unsupported
2231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endif
2241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
2251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
2261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
2271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/*
2281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Macros for loading mask pixels into register 'mask'.
2291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * vdup must be done in somewhere else.
2301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */
2311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_load_mask_x numpix, mask
2321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
2331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
2341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_load_mask_8 numpix, mask
2351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.if numpix == 4
2361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {mask[0]}, [MASK]!
2371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.elseif numpix == 2
2381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.16     {mask[0]}, [MASK]!
2391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.elseif numpix == 1
2401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.8      {mask[0]}, [MASK]!
2411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.else
2421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .error bilinear_load_mask_8 numpix is unsupported
2431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endif
2441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pld         [MASK, #prefetch_offset]
2451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
2461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
2471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_load_mask mask_fmt, numpix, mask
2481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_load_mask_&mask_fmt numpix, mask
2491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
2501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
2511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
2521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/*
2531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Macros for loading destination pixels into register 'dst0' and 'dst1'.
2541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Interleave should be done somewhere else.
2551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */
2561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_load_dst_0565_src numpix, dst0, dst1, dst01
2571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
2581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
2591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_load_dst_8888_src numpix, dst0, dst1, dst01
2601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
2611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
2621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_load_dst_8888 numpix, dst0, dst1, dst01
2631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.if numpix == 4
2641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {dst0, dst1}, [OUT]
2651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.elseif numpix == 2
2661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {dst0}, [OUT]
2671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.elseif numpix == 1
2681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {dst0[0]}, [OUT]
2691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.else
2701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .error bilinear_load_dst_8888 numpix is unsupported
2711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endif
2721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pld         [OUT, #(prefetch_offset * 4)]
2731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
2741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
2751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_load_dst_8888_over numpix, dst0, dst1, dst01
2761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_load_dst_8888 numpix, dst0, dst1, dst01
2771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
2781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
2791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_load_dst_8888_add numpix, dst0, dst1, dst01
2801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_load_dst_8888 numpix, dst0, dst1, dst01
2811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
2821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
2831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_load_dst dst_fmt, op, numpix, dst0, dst1, dst01
2841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_load_dst_&dst_fmt&_&op numpix, dst0, dst1, dst01
2851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
2861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
2871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/*
2881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Macros for duplicating partially loaded mask to fill entire register.
2891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * We will apply mask to interleaved source pixels, that is
2901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  (r0, r1, r2, r3, g0, g1, g2, g3) x (m0, m1, m2, m3, m0, m1, m2, m3)
2911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  (b0, b1, b2, b3, a0, a1, a2, a3) x (m0, m1, m2, m3, m0, m1, m2, m3)
2921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * So, we need to duplicate loaded mask into whole register.
2931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *
2941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * For two pixel case
2951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  (r0, r1, x, x, g0, g1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1)
2961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  (b0, b1, x, x, a0, a1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1)
2971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * We can do some optimizations for this including last pixel cases.
2981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */
2991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_duplicate_mask_x numpix, mask
3001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
3011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_duplicate_mask_8 numpix, mask
3031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.if numpix == 4
3041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vdup.32     mask, mask[0]
3051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.elseif numpix == 2
3061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vdup.16     mask, mask[0]
3071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.elseif numpix == 1
3081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vdup.8      mask, mask[0]
3091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.else
3101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .error bilinear_duplicate_mask_8 is unsupported
3111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endif
3121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
3131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_duplicate_mask mask_fmt, numpix, mask
3151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_duplicate_mask_&mask_fmt numpix, mask
3161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
3171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/*
3191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Macros for interleaving src and dst pixels to rrrr gggg bbbb aaaa form.
3201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Interleave should be done when maks is enabled or operator is 'over'.
3211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */
3221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_interleave src0, src1, dst0, dst1
3231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.8      src0, src1
3241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.8      dst0, dst1
3251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.8      src0, src1
3261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.8      dst0, dst1
3271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
3281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_interleave_src_dst_x_src \
3301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                numpix, src0, src1, src01, dst0, dst1, dst01
3311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
3321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_interleave_src_dst_x_over \
3341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                numpix, src0, src1, src01, dst0, dst1, dst01
3351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interleave src0, src1, dst0, dst1
3371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
3381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_interleave_src_dst_x_add \
3401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                numpix, src0, src1, src01, dst0, dst1, dst01
3411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
3421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_interleave_src_dst_8_src \
3441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                numpix, src0, src1, src01, dst0, dst1, dst01
3451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interleave src0, src1, dst0, dst1
3471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
3481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_interleave_src_dst_8_over \
3501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                numpix, src0, src1, src01, dst0, dst1, dst01
3511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interleave src0, src1, dst0, dst1
3531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
3541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_interleave_src_dst_8_add \
3561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                numpix, src0, src1, src01, dst0, dst1, dst01
3571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interleave src0, src1, dst0, dst1
3591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
3601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_interleave_src_dst \
3621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                mask_fmt, op, numpix, src0, src1, src01, dst0, dst1, dst01
3631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interleave_src_dst_&mask_fmt&_&op \
3651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                numpix, src0, src1, src01, dst0, dst1, dst01
3661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
3671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/*
3701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Macros for applying masks to src pixels. (see combine_mask_u() function)
3711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * src, dst should be in interleaved form.
3721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * mask register should be in form (m0, m1, m2, m3).
3731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */
3741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_apply_mask_to_src_x \
3751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                numpix, src0, src1, src01, mask, \
3761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                tmp01, tmp23, tmp45, tmp67
3771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
3781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_apply_mask_to_src_8 \
3801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                numpix, src0, src1, src01, mask, \
3811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                tmp01, tmp23, tmp45, tmp67
3821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8        tmp01, src0, mask
3841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8        tmp23, src1, mask
3851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    /* bubbles */
3861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vrshr.u16       tmp45, tmp01, #8
3871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vrshr.u16       tmp67, tmp23, #8
3881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    /* bubbles */
3891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vraddhn.u16     src0, tmp45, tmp01
3901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vraddhn.u16     src1, tmp67, tmp23
3911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
3921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_apply_mask_to_src \
3941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                mask_fmt, numpix, src0, src1, src01, mask, \
3951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                tmp01, tmp23, tmp45, tmp67
3961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
3971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_apply_mask_to_src_&mask_fmt \
3981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                numpix, src0, src1, src01, mask, \
3991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                tmp01, tmp23, tmp45, tmp67
4001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
4011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
4021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
4031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/*
4041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Macros for combining src and destination pixels.
4051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Interleave or not is depending on operator 'op'.
4061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */
4071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_combine_src \
4081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                numpix, src0, src1, src01, dst0, dst1, dst01, \
4091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                tmp01, tmp23, tmp45, tmp67, tmp8
4101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
4111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
4121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_combine_over \
4131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                numpix, src0, src1, src01, dst0, dst1, dst01, \
4141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                tmp01, tmp23, tmp45, tmp67, tmp8
4151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
4161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vdup.32     tmp8, src1[1]
4171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    /* bubbles */
4181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmvn.8      tmp8, tmp8
4191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    /* bubbles */
4201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    tmp01, dst0, tmp8
4211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    /* bubbles */
4221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    tmp23, dst1, tmp8
4231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    /* bubbles */
4241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vrshr.u16   tmp45, tmp01, #8
4251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vrshr.u16   tmp67, tmp23, #8
4261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    /* bubbles */
4271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vraddhn.u16 dst0, tmp45, tmp01
4281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vraddhn.u16 dst1, tmp67, tmp23
4291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    /* bubbles */
4301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vqadd.u8    src01, dst01, src01
4311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
4321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
4331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_combine_add \
4341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                numpix, src0, src1, src01, dst0, dst1, dst01, \
4351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                tmp01, tmp23, tmp45, tmp67, tmp8
4361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
4371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vqadd.u8    src01, dst01, src01
4381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
4391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
4401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_combine \
4411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                op, numpix, src0, src1, src01, dst0, dst1, dst01, \
4421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                tmp01, tmp23, tmp45, tmp67, tmp8
4431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
4441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_combine_&op \
4451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                numpix, src0, src1, src01, dst0, dst1, dst01, \
4461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                tmp01, tmp23, tmp45, tmp67, tmp8
4471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
4481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
4491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/*
4501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Macros for final deinterleaving of destination pixels if needed.
4511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */
4521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_deinterleave numpix, dst0, dst1, dst01
4531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.8      dst0, dst1
4541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    /* bubbles */
4551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.8      dst0, dst1
4561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
4571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
4581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_deinterleave_dst_x_src numpix, dst0, dst1, dst01
4591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
4601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
4611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_deinterleave_dst_x_over numpix, dst0, dst1, dst01
4621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_deinterleave numpix, dst0, dst1, dst01
4631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
4641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
4651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_deinterleave_dst_x_add numpix, dst0, dst1, dst01
4661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
4671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
4681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_deinterleave_dst_8_src numpix, dst0, dst1, dst01
4691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_deinterleave numpix, dst0, dst1, dst01
4701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
4711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
4721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_deinterleave_dst_8_over numpix, dst0, dst1, dst01
4731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_deinterleave numpix, dst0, dst1, dst01
4741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
4751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
4761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_deinterleave_dst_8_add numpix, dst0, dst1, dst01
4771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_deinterleave numpix, dst0, dst1, dst01
4781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
4791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
4801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_deinterleave_dst mask_fmt, op, numpix, dst0, dst1, dst01
4811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_deinterleave_dst_&mask_fmt&_&op numpix, dst0, dst1, dst01
4821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
4831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
4841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
4851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_interpolate_last_pixel src_fmt, mask_fmt, dst_fmt, op
4861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_load_&src_fmt d0, d1, d2
4871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_load_mask mask_fmt, 1, d4
4881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_load_dst dst_fmt, op, 1, d18, d19, q9
4891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8  q1, d0, d28
4901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8  q1, d1, d29
4911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    /* 5 cycles bubble */
4921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS
4931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlsl.u16 q0, d2, d30
4941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u16 q0, d3, d30
4951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    /* 5 cycles bubble */
4961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_duplicate_mask mask_fmt, 1, d4
4971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
4981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    /* 3 cycles bubble */
4991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmovn.u16 d0, q0
5001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    /* 1 cycle bubble */
5011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interleave_src_dst \
5021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                mask_fmt, op, 1, d0, d1, q0, d18, d19, q9
5031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_apply_mask_to_src \
5041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                mask_fmt, 1, d0, d1, q0, d4, \
5051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                q3, q8, q10, q11
5061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_combine \
5071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                op, 1, d0, d1, q0, d18, d19, q9, \
5081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                q3, q8, q10, q11, d5
5091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_deinterleave_dst mask_fmt, op, 1, d0, d1, q0
5101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_store_&dst_fmt 1, q2, q3
5111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
5121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
5131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_interpolate_two_pixels src_fmt, mask_fmt, dst_fmt, op
5141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_load_and_vertical_interpolate_two_&src_fmt \
5151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                q1, q11, d0, d1, d20, d21, d22, d23
5161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_load_mask mask_fmt, 2, d4
5171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_load_dst dst_fmt, op, 2, d18, d19, q9
5181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS
5191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlsl.u16 q0, d2, d30
5201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u16 q0, d3, d30
5211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS
5221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlsl.u16 q10, d22, d31
5231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u16 q10, d23, d31
5241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
5251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS)
5261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_duplicate_mask mask_fmt, 2, d4
5271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
5281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vadd.u16  q12, q12, q13
5291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmovn.u16 d0, q0
5301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interleave_src_dst \
5311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                mask_fmt, op, 2, d0, d1, q0, d18, d19, q9
5321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_apply_mask_to_src \
5331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                mask_fmt, 2, d0, d1, q0, d4, \
5341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                q3, q8, q10, q11
5351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_combine \
5361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                op, 2, d0, d1, q0, d18, d19, q9, \
5371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                q3, q8, q10, q11, d5
5381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_deinterleave_dst mask_fmt, op, 2, d0, d1, q0
5391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_store_&dst_fmt 2, q2, q3
5401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
5411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
5421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_interpolate_four_pixels src_fmt, mask_fmt, dst_fmt, op
5431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_load_and_vertical_interpolate_four_&src_fmt \
5441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                q1, q11, d0, d1, d20, d21, d22, d23 \
5451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                q3, q9,  d4, d5, d16, d17, d18, d19
5461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pld       [TMP1, PF_OFFS]
5471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    sub       TMP1, TMP1, STRIDE
5481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS
5491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlsl.u16 q0, d2, d30
5501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u16 q0, d3, d30
5511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS
5521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlsl.u16 q10, d22, d31
5531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u16 q10, d23, d31
5541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
5551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshll.u16 q2, d6, #BILINEAR_INTERPOLATION_BITS
5561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlsl.u16 q2, d6, d30
5571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u16 q2, d7, d30
5581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshll.u16 q8, d18, #BILINEAR_INTERPOLATION_BITS
5591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_load_mask mask_fmt, 4, d22
5601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_load_dst dst_fmt, op, 4, d2, d3, q1
5611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pld       [TMP1, PF_OFFS]
5621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlsl.u16 q8, d18, d31
5631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u16 q8, d19, d31
5641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vadd.u16  q12, q12, q13
5651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
5661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS)
5671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS)
5681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshrn.u32 d5, q8, #(2 * BILINEAR_INTERPOLATION_BITS)
5691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_duplicate_mask mask_fmt, 4, d22
5701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
5711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmovn.u16 d0, q0
5721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmovn.u16 d1, q2
5731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vadd.u16  q12, q12, q13
5741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interleave_src_dst \
5751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                mask_fmt, op, 4, d0, d1, q0, d2, d3, q1
5761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_apply_mask_to_src \
5771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                mask_fmt, 4, d0, d1, q0, d22, \
5781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                q3, q8, q9, q10
5791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_combine \
5801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                op, 4, d0, d1, q0, d2, d3, q1, \
5811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                q3, q8, q9, q10, d23
5821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_deinterleave_dst mask_fmt, op, 4, d0, d1, q0
5831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_store_&dst_fmt 4, q2, q3
5841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
5851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
5861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set BILINEAR_FLAG_USE_MASK,		1
5871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.set BILINEAR_FLAG_USE_ALL_NEON_REGS,	2
5881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
5891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/*
5901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Main template macro for generating NEON optimized bilinear scanline functions.
5911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *
5921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Bilinear scanline generator macro take folling arguments:
5931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  fname			- name of the function to generate
5941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  src_fmt			- source color format (8888 or 0565)
5951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  dst_fmt			- destination color format (8888 or 0565)
5961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  src/dst_bpp_shift		- (1 << bpp_shift) is the size of src/dst pixel in bytes
5971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  process_last_pixel		- code block that interpolate one pixel and does not
5981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *				  update horizontal weight
5991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  process_two_pixels		- code block that interpolate two pixels and update
6001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *				  horizontal weight
6011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  process_four_pixels		- code block that interpolate four pixels and update
6021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *				  horizontal weight
6031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  process_pixblock_head	- head part of middle loop
6041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  process_pixblock_tail	- tail part of middle loop
6051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  process_pixblock_tail_head	- tail_head of middle loop
6061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  pixblock_size		- number of pixels processed in a single middle loop
6071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck *  prefetch_distance		- prefetch in the source image by that many pixels ahead
6081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */
6091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
6101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro generate_bilinear_scanline_func \
6111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	fname, \
6121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	src_fmt, dst_fmt, src_bpp_shift, dst_bpp_shift, \
6131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	bilinear_process_last_pixel, \
6141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	bilinear_process_two_pixels, \
6151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	bilinear_process_four_pixels, \
6161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	bilinear_process_pixblock_head, \
6171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	bilinear_process_pixblock_tail, \
6181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	bilinear_process_pixblock_tail_head, \
6191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	pixblock_size, \
6201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	prefetch_distance, \
6211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck	flags
6221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
6231176bdada62cabc6ec4b0308a930e83b679d5d36John Reckpixman_asm_function fname
6241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.if pixblock_size == 8
6251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.elseif pixblock_size == 4
6261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.else
6271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .error unsupported pixblock size
6281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endif
6291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
6301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0
6311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    OUT       .req    r0
6321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    TOP       .req    r1
6331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    BOTTOM    .req    r2
6341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    WT        .req    r3
6351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    WB        .req    r4
6361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    X         .req    r5
6371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    UX        .req    r6
6381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    WIDTH     .req    ip
6391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    TMP1      .req    r3
6401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    TMP2      .req    r4
6411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    PF_OFFS   .req    r7
6421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    TMP3      .req    r8
6431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    TMP4      .req    r9
6441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    STRIDE    .req    r2
6451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
6461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov		ip, sp
6471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    push	{r4, r5, r6, r7, r8, r9}
6481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov		PF_OFFS, #prefetch_distance
6491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    ldmia	ip, {WB, X, UX, WIDTH}
6501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.else
6511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    OUT       .req      r0
6521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    MASK      .req      r1
6531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    TOP       .req      r2
6541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    BOTTOM    .req      r3
6551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    WT        .req      r4
6561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    WB        .req      r5
6571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    X         .req      r6
6581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    UX        .req      r7
6591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    WIDTH     .req      ip
6601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    TMP1      .req      r4
6611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    TMP2      .req      r5
6621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    PF_OFFS   .req      r8
6631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    TMP3      .req      r9
6641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    TMP4      .req      r10
6651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    STRIDE    .req      r3
6661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
6671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .set prefetch_offset, prefetch_distance
6681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
6691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov       ip, sp
6701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    push      {r4, r5, r6, r7, r8, r9, r10, ip}
6711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov       PF_OFFS, #prefetch_distance
6721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    ldmia     ip, {WT, WB, X, UX, WIDTH}
6731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endif
6741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
6751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mul       PF_OFFS, PF_OFFS, UX
6761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
6771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
6781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vpush     {d8-d15}
6791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endif
6801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
6811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    sub	      STRIDE, BOTTOM, TOP
6821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .unreq    BOTTOM
6831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
6841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    cmp       WIDTH, #0
6851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    ble       3f
6861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
6871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vdup.u16  q12, X
6881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vdup.u16  q13, UX
6891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vdup.u8   d28, WT
6901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vdup.u8   d29, WB
6911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vadd.u16  d25, d25, d26
6921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
6931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    /* ensure good destination alignment  */
6941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    cmp       WIDTH, #1
6951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    blt       0f
6961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    tst       OUT, #(1 << dst_bpp_shift)
6971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    beq       0f
6981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
6991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vadd.u16  q12, q12, q13
7001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_process_last_pixel
7011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    sub       WIDTH, WIDTH, #1
7021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck0:
7031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vadd.u16  q13, q13, q13
7041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
7051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vadd.u16  q12, q12, q13
7061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
7071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    cmp       WIDTH, #2
7081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    blt       0f
7091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    tst       OUT, #(1 << (dst_bpp_shift + 1))
7101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    beq       0f
7111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_process_two_pixels
7121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    sub       WIDTH, WIDTH, #2
7131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck0:
7141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.if pixblock_size == 8
7151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    cmp       WIDTH, #4
7161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    blt       0f
7171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    tst       OUT, #(1 << (dst_bpp_shift + 2))
7181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    beq       0f
7191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_process_four_pixels
7201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    sub       WIDTH, WIDTH, #4
7211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck0:
7221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endif
7231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    subs      WIDTH, WIDTH, #pixblock_size
7241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    blt       1f
7251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov       PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
7261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_process_pixblock_head
7271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    subs      WIDTH, WIDTH, #pixblock_size
7281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    blt       5f
7291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck0:
7301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_process_pixblock_tail_head
7311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    subs      WIDTH, WIDTH, #pixblock_size
7321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bge       0b
7331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck5:
7341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_process_pixblock_tail
7351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck1:
7361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.if pixblock_size == 8
7371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    tst       WIDTH, #4
7381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    beq       2f
7391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_process_four_pixels
7401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck2:
7411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endif
7421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    /* handle the remaining trailing pixels */
7431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    tst       WIDTH, #2
7441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    beq       2f
7451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_process_two_pixels
7461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck2:
7471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    tst       WIDTH, #1
7481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    beq       3f
7491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_process_last_pixel
7501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck3:
7511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
7521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vpop      {d8-d15}
7531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endif
7541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
7551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0
7561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pop       {r4, r5, r6, r7, r8, r9}
7571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.else
7581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pop       {r4, r5, r6, r7, r8, r9, r10, ip}
7591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endif
7601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bx        lr
7611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
7621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .unreq    OUT
7631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .unreq    TOP
7641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .unreq    WT
7651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .unreq    WB
7661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .unreq    X
7671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .unreq    UX
7681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .unreq    WIDTH
7691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .unreq    TMP1
7701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .unreq    TMP2
7711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .unreq    PF_OFFS
7721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .unreq    TMP3
7731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .unreq    TMP4
7741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .unreq    STRIDE
7751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.if ((flags) & BILINEAR_FLAG_USE_MASK) != 0
7761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    .unreq    MASK
7771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endif
7781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
7791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endfunc
7801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
7811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
7821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
7831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* src_8888_8_8888 */
7841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_8888_8_8888_process_last_pixel
7851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_last_pixel 8888, 8, 8888, src
7861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
7871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
7881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_8888_8_8888_process_two_pixels
7891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_two_pixels 8888, 8, 8888, src
7901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
7911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
7921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_8888_8_8888_process_four_pixels
7931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_four_pixels 8888, 8, 8888, src
7941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
7951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
7961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_8888_8_8888_process_pixblock_head
7971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_8888_8_8888_process_four_pixels
7981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
7991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_8888_8_8888_process_pixblock_tail
8011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_8888_8_8888_process_pixblock_tail_head
8041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_8888_8_8888_process_pixblock_tail
8051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_8888_8_8888_process_pixblock_head
8061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* src_8888_8_0565 */
8091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_8888_8_0565_process_last_pixel
8101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_last_pixel 8888, 8, 0565, src
8111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_8888_8_0565_process_two_pixels
8141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_two_pixels 8888, 8, 0565, src
8151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_8888_8_0565_process_four_pixels
8181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_four_pixels 8888, 8, 0565, src
8191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_8888_8_0565_process_pixblock_head
8221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_8888_8_0565_process_four_pixels
8231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_8888_8_0565_process_pixblock_tail
8261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_8888_8_0565_process_pixblock_tail_head
8291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_8888_8_0565_process_pixblock_tail
8301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_8888_8_0565_process_pixblock_head
8311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* src_0565_8_x888 */
8341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_0565_8_x888_process_last_pixel
8351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_last_pixel 0565, 8, 8888, src
8361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_0565_8_x888_process_two_pixels
8391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_two_pixels 0565, 8, 8888, src
8401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_0565_8_x888_process_four_pixels
8431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_four_pixels 0565, 8, 8888, src
8441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_0565_8_x888_process_pixblock_head
8471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_0565_8_x888_process_four_pixels
8481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_0565_8_x888_process_pixblock_tail
8511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_0565_8_x888_process_pixblock_tail_head
8541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_0565_8_x888_process_pixblock_tail
8551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_0565_8_x888_process_pixblock_head
8561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* src_0565_8_0565 */
8591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_0565_8_0565_process_last_pixel
8601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_last_pixel 0565, 8, 0565, src
8611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_0565_8_0565_process_two_pixels
8641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_two_pixels 0565, 8, 0565, src
8651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_0565_8_0565_process_four_pixels
8681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_four_pixels 0565, 8, 0565, src
8691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_0565_8_0565_process_pixblock_head
8721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_0565_8_0565_process_four_pixels
8731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_0565_8_0565_process_pixblock_tail
8761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_src_0565_8_0565_process_pixblock_tail_head
8791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_0565_8_0565_process_pixblock_tail
8801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_0565_8_0565_process_pixblock_head
8811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* over_8888_8888 */
8841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_over_8888_8888_process_last_pixel
8851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_last_pixel 8888, x, 8888, over
8861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_over_8888_8888_process_two_pixels
8891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_two_pixels 8888, x, 8888, over
8901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_over_8888_8888_process_four_pixels
8931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_four_pixels 8888, x, 8888, over
8941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
8951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
8961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_over_8888_8888_process_pixblock_head
8971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov         TMP1, X, asr #16
8981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         X, X, UX
8991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         TMP1, TOP, TMP1, asl #2
9001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov         TMP2, X, asr #16
9011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         X, X, UX
9021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         TMP2, TOP, TMP2, asl #2
9031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
9041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d22}, [TMP1], STRIDE
9051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d23}, [TMP1]
9061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov         TMP3, X, asr #16
9071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         X, X, UX
9081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         TMP3, TOP, TMP3, asl #2
9091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q8, d22, d28
9101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8    q8, d23, d29
9111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
9121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d22}, [TMP2], STRIDE
9131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d23}, [TMP2]
9141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov         TMP4, X, asr #16
9151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         X, X, UX
9161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         TMP4, TOP, TMP4, asl #2
9171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q9, d22, d28
9181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8    q9, d23, d29
9191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
9201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d22}, [TMP3], STRIDE
9211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d23}, [TMP3]
9221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q10, d22, d28
9231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8    q10, d23, d29
9241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
9251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshll.u16   q0, d16, #BILINEAR_INTERPOLATION_BITS
9261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlsl.u16   q0, d16, d30
9271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u16   q0, d17, d30
9281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
9291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pld         [TMP4, PF_OFFS]
9301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d16}, [TMP4], STRIDE
9311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d17}, [TMP4]
9321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pld         [TMP4, PF_OFFS]
9331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q11, d16, d28
9341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8    q11, d17, d29
9351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
9361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshll.u16   q1, d18, #BILINEAR_INTERPOLATION_BITS
9371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlsl.u16   q1, d18, d31
9381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u16   q1, d19, d31
9391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshr.u16    q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
9401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vadd.u16    q12, q12, q13
9411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
9421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
9431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_over_8888_8888_process_pixblock_tail
9441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshll.u16   q2, d20, #BILINEAR_INTERPOLATION_BITS
9451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlsl.u16   q2, d20, d30
9461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u16   q2, d21, d30
9471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshll.u16   q3, d22, #BILINEAR_INTERPOLATION_BITS
9481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlsl.u16   q3, d22, d31
9491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u16   q3, d23, d31
9501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshrn.u32   d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
9511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshrn.u32   d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS)
9521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d2, d3}, [OUT, :128]
9531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pld         [OUT, #(prefetch_offset * 4)]
9541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshrn.u32   d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS)
9551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshr.u16    q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
9561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshrn.u32   d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS)
9571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmovn.u16   d6, q0
9581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmovn.u16   d7, q2
9591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.8      d6, d7
9601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.8      d2, d3
9611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.8      d6, d7
9621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.8      d2, d3
9631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vdup.32     d4, d7[1]
9641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmvn.8      d4, d4
9651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q11, d2, d4
9661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q2, d3, d4
9671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vrshr.u16   q1, q11, #8
9681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vrshr.u16   q10, q2, #8
9691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vraddhn.u16 d2, q1, q11
9701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vraddhn.u16 d3, q10, q2
9711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vqadd.u8    q3, q1, q3
9721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.8      d6, d7
9731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.8      d6, d7
9741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vadd.u16    q12, q12, q13
9751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vst1.32     {d6, d7}, [OUT, :128]!
9761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
9771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
9781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_over_8888_8888_process_pixblock_tail_head
9791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vshll.u16   q2, d20, #BILINEAR_INTERPOLATION_BITS
9801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov         TMP1, X, asr #16
9811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         X, X, UX
9821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         TMP1, TOP, TMP1, asl #2
9831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vmlsl.u16   q2, d20, d30
9841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov         TMP2, X, asr #16
9851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         X, X, UX
9861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         TMP2, TOP, TMP2, asl #2
9871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vmlal.u16   q2, d21, d30
9881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vshll.u16   q3, d22, #BILINEAR_INTERPOLATION_BITS
9891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d20}, [TMP1], STRIDE
9901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vmlsl.u16   q3, d22, d31
9911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vmlal.u16   q3, d23, d31
9921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d21}, [TMP1]
9931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q8, d20, d28
9941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8    q8, d21, d29
9951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vshrn.u32   d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
9961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vshrn.u32   d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS)
9971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vld1.32     {d2, d3}, [OUT, :128]
9981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            pld         [OUT, PF_OFFS]
9991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vshrn.u32   d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS)
10001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vshr.u16    q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
10011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d22}, [TMP2], STRIDE
10021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vshrn.u32   d5, q3, #(2 * BILINEAR_INTERPOLATION_BITS)
10031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vmovn.u16   d6, q0
10041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d23}, [TMP2]
10051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q9, d22, d28
10061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov         TMP3, X, asr #16
10071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         X, X, UX
10081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         TMP3, TOP, TMP3, asl #2
10091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov         TMP4, X, asr #16
10101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         X, X, UX
10111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         TMP4, TOP, TMP4, asl #2
10121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8    q9, d23, d29
10131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vmovn.u16   d7, q2
10141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d22}, [TMP3], STRIDE
10151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vuzp.8      d6, d7
10161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vuzp.8      d2, d3
10171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vuzp.8      d6, d7
10181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vuzp.8      d2, d3
10191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vdup.32     d4, d7[1]
10201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d23}, [TMP3]
10211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vmvn.8      d4, d4
10221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q10, d22, d28
10231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8    q10, d23, d29
10241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vmull.u8    q11, d2, d4
10251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vmull.u8    q2, d3, d4
10261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshll.u16   q0, d16, #BILINEAR_INTERPOLATION_BITS
10271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlsl.u16   q0, d16, d30
10281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vrshr.u16   q1, q11, #8
10291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u16   q0, d17, d30
10301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vrshr.u16   q8, q2, #8
10311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vraddhn.u16 d2, q1, q11
10321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vraddhn.u16 d3, q8, q2
10331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pld         [TMP4, PF_OFFS]
10341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d16}, [TMP4], STRIDE
10351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vqadd.u8    q3, q1, q3
10361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d17}, [TMP4]
10371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pld         [TMP4, PF_OFFS]
10381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q11, d16, d28
10391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8    q11, d17, d29
10401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vuzp.8      d6, d7
10411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshll.u16   q1, d18, #BILINEAR_INTERPOLATION_BITS
10421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vuzp.8      d6, d7
10431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlsl.u16   q1, d18, d31
10441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vadd.u16    q12, q12, q13
10451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u16   q1, d19, d31
10461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshr.u16    q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
10471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vadd.u16    q12, q12, q13
10481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vst1.32     {d6, d7}, [OUT, :128]!
10491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
10501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
10511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* over_8888_8_8888 */
10521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_over_8888_8_8888_process_last_pixel
10531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_last_pixel 8888, 8, 8888, over
10541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
10551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
10561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_over_8888_8_8888_process_two_pixels
10571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_two_pixels 8888, 8, 8888, over
10581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
10591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
10601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_over_8888_8_8888_process_four_pixels
10611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_four_pixels 8888, 8, 8888, over
10621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
10631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
10641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_over_8888_8_8888_process_pixblock_head
10651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov         TMP1, X, asr #16
10661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         X, X, UX
10671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         TMP1, TOP, TMP1, asl #2
10681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d0}, [TMP1], STRIDE
10691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov         TMP2, X, asr #16
10701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         X, X, UX
10711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         TMP2, TOP, TMP2, asl #2
10721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d1}, [TMP1]
10731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov         TMP3, X, asr #16
10741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         X, X, UX
10751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         TMP3, TOP, TMP3, asl #2
10761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d2}, [TMP2], STRIDE
10771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov         TMP4, X, asr #16
10781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         X, X, UX
10791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         TMP4, TOP, TMP4, asl #2
10801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d3}, [TMP2]
10811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q2, d0, d28
10821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q3, d2, d28
10831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8    q2, d1, d29
10841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8    q3, d3, d29
10851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshll.u16   q0, d4, #BILINEAR_INTERPOLATION_BITS
10861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshll.u16   q1, d6, #BILINEAR_INTERPOLATION_BITS
10871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlsl.u16   q0, d4, d30
10881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlsl.u16   q1, d6, d31
10891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u16   q0, d5, d30
10901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u16   q1, d7, d31
10911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshrn.u32   d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
10921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshrn.u32   d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS)
10931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d2}, [TMP3], STRIDE
10941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d3}, [TMP3]
10951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pld         [TMP4, PF_OFFS]
10961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d4}, [TMP4], STRIDE
10971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d5}, [TMP4]
10981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pld         [TMP4, PF_OFFS]
10991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q3, d2, d28
11001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8    q3, d3, d29
11011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q1, d4, d28
11021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8    q1, d5, d29
11031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshr.u16    q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
11041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d22[0]}, [MASK]!
11051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pld         [MASK, #prefetch_offset]
11061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vadd.u16    q12, q12, q13
11071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmovn.u16   d16, q0
11081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
11091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
11101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_over_8888_8_8888_process_pixblock_tail
11111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshll.u16   q9, d6, #BILINEAR_INTERPOLATION_BITS
11121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshll.u16   q10, d2, #BILINEAR_INTERPOLATION_BITS
11131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlsl.u16   q9, d6, d30
11141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlsl.u16   q10, d2, d31
11151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u16   q9, d7, d30
11161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u16   q10, d3, d31
11171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshr.u16    q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
11181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vadd.u16    q12, q12, q13
11191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vdup.32     d22, d22[0]
11201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshrn.u32   d18, q9, #(2 * BILINEAR_INTERPOLATION_BITS)
11211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshrn.u32   d19, q10, #(2 * BILINEAR_INTERPOLATION_BITS)
11221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmovn.u16   d17, q9
11231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d18, d19}, [OUT, :128]
11241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pld         [OUT, PF_OFFS]
11251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.8      d16, d17
11261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.8      d18, d19
11271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.8      d16, d17
11281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.8      d18, d19
11291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q10, d16, d22
11301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q11, d17, d22
11311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vrsra.u16   q10, q10, #8
11321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vrsra.u16   q11, q11, #8
11331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vrshrn.u16  d16, q10, #8
11341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vrshrn.u16  d17, q11, #8
11351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vdup.32     d22, d17[1]
11361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmvn.8      d22, d22
11371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q10, d18, d22
11381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q11, d19, d22
11391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vrshr.u16   q9, q10, #8
11401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vrshr.u16   q0, q11, #8
11411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vraddhn.u16 d18, q9, q10
11421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vraddhn.u16 d19, q0, q11
11431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vqadd.u8    q9, q8, q9
11441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.8      d18, d19
11451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vuzp.8      d18, d19
11461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vst1.32     {d18, d19}, [OUT, :128]!
11471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
11481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
11491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_over_8888_8_8888_process_pixblock_tail_head
11501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vshll.u16   q9, d6, #BILINEAR_INTERPOLATION_BITS
11511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov         TMP1, X, asr #16
11521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         X, X, UX
11531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         TMP1, TOP, TMP1, asl #2
11541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vshll.u16   q10, d2, #BILINEAR_INTERPOLATION_BITS
11551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d0}, [TMP1], STRIDE
11561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov         TMP2, X, asr #16
11571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         X, X, UX
11581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         TMP2, TOP, TMP2, asl #2
11591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vmlsl.u16   q9, d6, d30
11601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vmlsl.u16   q10, d2, d31
11611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d1}, [TMP1]
11621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov         TMP3, X, asr #16
11631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         X, X, UX
11641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         TMP3, TOP, TMP3, asl #2
11651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vmlal.u16   q9, d7, d30
11661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vmlal.u16   q10, d3, d31
11671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d2}, [TMP2], STRIDE
11681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    mov         TMP4, X, asr #16
11691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         X, X, UX
11701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    add         TMP4, TOP, TMP4, asl #2
11711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vshr.u16    q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
11721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vadd.u16    q12, q12, q13
11731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d3}, [TMP2]
11741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vdup.32     d22, d22[0]
11751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vshrn.u32   d18, q9, #(2 * BILINEAR_INTERPOLATION_BITS)
11761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vshrn.u32   d19, q10, #(2 * BILINEAR_INTERPOLATION_BITS)
11771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q2, d0, d28
11781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q3, d2, d28
11791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vmovn.u16   d17, q9
11801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vld1.32     {d18, d19}, [OUT, :128]
11811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            pld         [OUT, #(prefetch_offset * 4)]
11821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8    q2, d1, d29
11831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8    q3, d3, d29
11841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vuzp.8      d16, d17
11851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vuzp.8      d18, d19
11861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshll.u16   q0, d4, #BILINEAR_INTERPOLATION_BITS
11871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshll.u16   q1, d6, #BILINEAR_INTERPOLATION_BITS
11881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vuzp.8      d16, d17
11891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vuzp.8      d18, d19
11901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlsl.u16   q0, d4, d30
11911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlsl.u16   q1, d6, d31
11921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vmull.u8    q10, d16, d22
11931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vmull.u8    q11, d17, d22
11941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u16   q0, d5, d30
11951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u16   q1, d7, d31
11961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vrsra.u16   q10, q10, #8
11971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vrsra.u16   q11, q11, #8
11981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshrn.u32   d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
11991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshrn.u32   d1, q1, #(2 * BILINEAR_INTERPOLATION_BITS)
12001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vrshrn.u16  d16, q10, #8
12011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vrshrn.u16  d17, q11, #8
12021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d2}, [TMP3], STRIDE
12031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vdup.32     d22, d17[1]
12041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d3}, [TMP3]
12051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vmvn.8      d22, d22
12061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pld         [TMP4, PF_OFFS]
12071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d4}, [TMP4], STRIDE
12081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vmull.u8    q10, d18, d22
12091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vmull.u8    q11, d19, d22
12101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d5}, [TMP4]
12111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pld         [TMP4, PF_OFFS]
12121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q3, d2, d28
12131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vrshr.u16   q9, q10, #8
12141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vrshr.u16   q15, q11, #8
12151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8    q3, d3, d29
12161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmull.u8    q1, d4, d28
12171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vraddhn.u16 d18, q9, q10
12181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vraddhn.u16 d19, q15, q11
12191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmlal.u8    q1, d5, d29
12201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vshr.u16    q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
12211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vqadd.u8    q9, q8, q9
12221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vld1.32     {d22[0]}, [MASK]!
12231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vuzp.8      d18, d19
12241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vadd.u16    q12, q12, q13
12251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vuzp.8      d18, d19
12261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    vmovn.u16   d16, q0
12271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck                                            vst1.32     {d18, d19}, [OUT, :128]!
12281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
12291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
12301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* add_8888_8888 */
12311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_add_8888_8888_process_last_pixel
12321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_last_pixel 8888, x, 8888, add
12331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
12341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
12351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_add_8888_8888_process_two_pixels
12361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_two_pixels 8888, x, 8888, add
12371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
12381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
12391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_add_8888_8888_process_four_pixels
12401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_four_pixels 8888, x, 8888, add
12411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
12421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
12431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_add_8888_8888_process_pixblock_head
12441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_add_8888_8888_process_four_pixels
12451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
12461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
12471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_add_8888_8888_process_pixblock_tail
12481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
12491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
12501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_add_8888_8888_process_pixblock_tail_head
12511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_add_8888_8888_process_pixblock_tail
12521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_add_8888_8888_process_pixblock_head
12531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
12541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
12551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* add_8888_8_8888 */
12561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_add_8888_8_8888_process_last_pixel
12571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_last_pixel 8888, 8, 8888, add
12581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
12591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
12601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_add_8888_8_8888_process_two_pixels
12611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_two_pixels 8888, 8, 8888, add
12621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
12631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
12641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_add_8888_8_8888_process_four_pixels
12651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_interpolate_four_pixels 8888, 8, 8888, add
12661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
12671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
12681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_add_8888_8_8888_process_pixblock_head
12691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_add_8888_8_8888_process_four_pixels
12701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
12711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
12721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_add_8888_8_8888_process_pixblock_tail
12731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
12741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
12751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.macro bilinear_add_8888_8_8888_process_pixblock_tail_head
12761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_add_8888_8_8888_process_pixblock_tail
12771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_add_8888_8_8888_process_pixblock_head
12781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck.endm
12791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
12801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
12811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* Bilinear scanline functions */
12821176bdada62cabc6ec4b0308a930e83b679d5d36John Reckgenerate_bilinear_scanline_func \
12831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_neon, \
12841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    8888, 8888, 2, 2, \
12851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_8888_8_8888_process_last_pixel, \
12861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_8888_8_8888_process_two_pixels, \
12871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_8888_8_8888_process_four_pixels, \
12881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_8888_8_8888_process_pixblock_head, \
12891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_8888_8_8888_process_pixblock_tail, \
12901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_8888_8_8888_process_pixblock_tail_head, \
12911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    4, 28, BILINEAR_FLAG_USE_MASK
12921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
12931176bdada62cabc6ec4b0308a930e83b679d5d36John Reckgenerate_bilinear_scanline_func \
12941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_neon, \
12951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    8888, 0565, 2, 1, \
12961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_8888_8_0565_process_last_pixel, \
12971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_8888_8_0565_process_two_pixels, \
12981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_8888_8_0565_process_four_pixels, \
12991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_8888_8_0565_process_pixblock_head, \
13001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_8888_8_0565_process_pixblock_tail, \
13011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_8888_8_0565_process_pixblock_tail_head, \
13021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    4, 28, BILINEAR_FLAG_USE_MASK
13031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
13041176bdada62cabc6ec4b0308a930e83b679d5d36John Reckgenerate_bilinear_scanline_func \
13051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_neon, \
13061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    0565, 8888, 1, 2, \
13071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_0565_8_x888_process_last_pixel, \
13081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_0565_8_x888_process_two_pixels, \
13091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_0565_8_x888_process_four_pixels, \
13101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_0565_8_x888_process_pixblock_head, \
13111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_0565_8_x888_process_pixblock_tail, \
13121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_0565_8_x888_process_pixblock_tail_head, \
13131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    4, 28, BILINEAR_FLAG_USE_MASK
13141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
13151176bdada62cabc6ec4b0308a930e83b679d5d36John Reckgenerate_bilinear_scanline_func \
13161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_neon, \
13171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    0565, 0565, 1, 1, \
13181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_0565_8_0565_process_last_pixel, \
13191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_0565_8_0565_process_two_pixels, \
13201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_0565_8_0565_process_four_pixels, \
13211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_0565_8_0565_process_pixblock_head, \
13221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_0565_8_0565_process_pixblock_tail, \
13231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_src_0565_8_0565_process_pixblock_tail_head, \
13241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    4, 28, BILINEAR_FLAG_USE_MASK
13251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
13261176bdada62cabc6ec4b0308a930e83b679d5d36John Reckgenerate_bilinear_scanline_func \
13271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_neon, \
13281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    8888, 8888, 2, 2, \
13291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_over_8888_8888_process_last_pixel, \
13301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_over_8888_8888_process_two_pixels, \
13311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_over_8888_8888_process_four_pixels, \
13321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_over_8888_8888_process_pixblock_head, \
13331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_over_8888_8888_process_pixblock_tail, \
13341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_over_8888_8888_process_pixblock_tail_head, \
13351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    4, 28, 0
13361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
13371176bdada62cabc6ec4b0308a930e83b679d5d36John Reckgenerate_bilinear_scanline_func \
13381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_neon, \
13391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    8888, 8888, 2, 2, \
13401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_over_8888_8_8888_process_last_pixel, \
13411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_over_8888_8_8888_process_two_pixels, \
13421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_over_8888_8_8888_process_four_pixels, \
13431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_over_8888_8_8888_process_pixblock_head, \
13441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_over_8888_8_8888_process_pixblock_tail, \
13451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_over_8888_8_8888_process_pixblock_tail_head, \
13461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    4, 28, BILINEAR_FLAG_USE_MASK
13471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
13481176bdada62cabc6ec4b0308a930e83b679d5d36John Reckgenerate_bilinear_scanline_func \
13491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_neon, \
13501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    8888, 8888, 2, 2, \
13511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_add_8888_8888_process_last_pixel, \
13521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_add_8888_8888_process_two_pixels, \
13531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_add_8888_8888_process_four_pixels, \
13541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_add_8888_8888_process_pixblock_head, \
13551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_add_8888_8888_process_pixblock_tail, \
13561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_add_8888_8888_process_pixblock_tail_head, \
13571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    4, 28, 0
13581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck
13591176bdada62cabc6ec4b0308a930e83b679d5d36John Reckgenerate_bilinear_scanline_func \
13601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_neon, \
13611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    8888, 8888, 2, 2, \
13621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_add_8888_8_8888_process_last_pixel, \
13631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_add_8888_8_8888_process_two_pixels, \
13641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_add_8888_8_8888_process_four_pixels, \
13651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_add_8888_8_8888_process_pixblock_head, \
13661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_add_8888_8_8888_process_pixblock_tail, \
13671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    bilinear_add_8888_8_8888_process_pixblock_tail_head, \
13681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck    4, 28, BILINEAR_FLAG_USE_MASK
1369