18feda42074db4786f308efdca56a54e1366398f1John Reck/*
28feda42074db4786f308efdca56a54e1366398f1John Reck * Copyright © 2013 The Android Open Source Project
38feda42074db4786f308efdca56a54e1366398f1John Reck *
48feda42074db4786f308efdca56a54e1366398f1John Reck * Permission is hereby granted, free of charge, to any person obtaining a
58feda42074db4786f308efdca56a54e1366398f1John Reck * copy of this software and associated documentation files (the "Software"),
68feda42074db4786f308efdca56a54e1366398f1John Reck * to deal in the Software without restriction, including without limitation
78feda42074db4786f308efdca56a54e1366398f1John Reck * the rights to use, copy, modify, merge, publish, distribute, sublicense,
88feda42074db4786f308efdca56a54e1366398f1John Reck * and/or sell copies of the Software, and to permit persons to whom the
98feda42074db4786f308efdca56a54e1366398f1John Reck * Software is furnished to do so, subject to the following conditions:
108feda42074db4786f308efdca56a54e1366398f1John Reck *
118feda42074db4786f308efdca56a54e1366398f1John Reck * The above copyright notice and this permission notice (including the next
128feda42074db4786f308efdca56a54e1366398f1John Reck * paragraph) shall be included in all copies or substantial portions of the
138feda42074db4786f308efdca56a54e1366398f1John Reck * Software.
148feda42074db4786f308efdca56a54e1366398f1John Reck *
158feda42074db4786f308efdca56a54e1366398f1John Reck * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
168feda42074db4786f308efdca56a54e1366398f1John Reck * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
178feda42074db4786f308efdca56a54e1366398f1John Reck * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
188feda42074db4786f308efdca56a54e1366398f1John Reck * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
198feda42074db4786f308efdca56a54e1366398f1John Reck * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
208feda42074db4786f308efdca56a54e1366398f1John Reck * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
218feda42074db4786f308efdca56a54e1366398f1John Reck * DEALINGS IN THE SOFTWARE.
228feda42074db4786f308efdca56a54e1366398f1John Reck */
238feda42074db4786f308efdca56a54e1366398f1John Reck/*
248feda42074db4786f308efdca56a54e1366398f1John Reck * Copyright © 2009 Nokia Corporation
258feda42074db4786f308efdca56a54e1366398f1John Reck *
268feda42074db4786f308efdca56a54e1366398f1John Reck * Permission is hereby granted, free of charge, to any person obtaining a
278feda42074db4786f308efdca56a54e1366398f1John Reck * copy of this software and associated documentation files (the "Software"),
288feda42074db4786f308efdca56a54e1366398f1John Reck * to deal in the Software without restriction, including without limitation
298feda42074db4786f308efdca56a54e1366398f1John Reck * the rights to use, copy, modify, merge, publish, distribute, sublicense,
308feda42074db4786f308efdca56a54e1366398f1John Reck * and/or sell copies of the Software, and to permit persons to whom the
318feda42074db4786f308efdca56a54e1366398f1John Reck * Software is furnished to do so, subject to the following conditions:
328feda42074db4786f308efdca56a54e1366398f1John Reck *
338feda42074db4786f308efdca56a54e1366398f1John Reck * The above copyright notice and this permission notice (including the next
348feda42074db4786f308efdca56a54e1366398f1John Reck * paragraph) shall be included in all copies or substantial portions of the
358feda42074db4786f308efdca56a54e1366398f1John Reck * Software.
368feda42074db4786f308efdca56a54e1366398f1John Reck *
378feda42074db4786f308efdca56a54e1366398f1John Reck * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
388feda42074db4786f308efdca56a54e1366398f1John Reck * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
398feda42074db4786f308efdca56a54e1366398f1John Reck * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
408feda42074db4786f308efdca56a54e1366398f1John Reck * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
418feda42074db4786f308efdca56a54e1366398f1John Reck * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
428feda42074db4786f308efdca56a54e1366398f1John Reck * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
438feda42074db4786f308efdca56a54e1366398f1John Reck * DEALINGS IN THE SOFTWARE.
448feda42074db4786f308efdca56a54e1366398f1John Reck *
458feda42074db4786f308efdca56a54e1366398f1John Reck * Author:  Siarhei Siamashka (siarhei.siamashka@nokia.com)
468feda42074db4786f308efdca56a54e1366398f1John Reck */
478feda42074db4786f308efdca56a54e1366398f1John Reck
488feda42074db4786f308efdca56a54e1366398f1John Reck#if defined(__linux__) && defined(__ELF__)
498feda42074db4786f308efdca56a54e1366398f1John Reck.section .note.GNU-stack,"",%progbits
508feda42074db4786f308efdca56a54e1366398f1John Reck#endif
518feda42074db4786f308efdca56a54e1366398f1John Reck
528feda42074db4786f308efdca56a54e1366398f1John Reck    .text
538feda42074db4786f308efdca56a54e1366398f1John Reck    .fpu neon
548feda42074db4786f308efdca56a54e1366398f1John Reck    .arch armv7a
558feda42074db4786f308efdca56a54e1366398f1John Reck    .object_arch armv4
568feda42074db4786f308efdca56a54e1366398f1John Reck    .eabi_attribute 10, 0 /* suppress Tag_FP_arch */
578feda42074db4786f308efdca56a54e1366398f1John Reck    .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */
588feda42074db4786f308efdca56a54e1366398f1John Reck    .arm
598feda42074db4786f308efdca56a54e1366398f1John Reck    .altmacro
608feda42074db4786f308efdca56a54e1366398f1John Reck    .p2align 2
618feda42074db4786f308efdca56a54e1366398f1John Reck
628feda42074db4786f308efdca56a54e1366398f1John Reck#include "pixman-private.h"
638feda42074db4786f308efdca56a54e1366398f1John Reck#include "pixman-arm-neon-asm.h"
648feda42074db4786f308efdca56a54e1366398f1John Reck
658feda42074db4786f308efdca56a54e1366398f1John Reck.set RESPECT_STRICT_ALIGNMENT, 1
668feda42074db4786f308efdca56a54e1366398f1John Reck.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED
678feda42074db4786f308efdca56a54e1366398f1John Reck.set PREFETCH_DISTANCE_SIMPLE, 64
688feda42074db4786f308efdca56a54e1366398f1John Reck
698feda42074db4786f308efdca56a54e1366398f1John Reck.set BILINEAR_FLAG_UNROLL_4,          0
708feda42074db4786f308efdca56a54e1366398f1John Reck.set BILINEAR_FLAG_UNROLL_8,          1
718feda42074db4786f308efdca56a54e1366398f1John Reck.set BILINEAR_FLAG_USE_ALL_NEON_REGS, 2
728feda42074db4786f308efdca56a54e1366398f1John Reck
738feda42074db4786f308efdca56a54e1366398f1John Reck/* Supplementary macro for setting function attributes */
748feda42074db4786f308efdca56a54e1366398f1John Reck.macro pixman_asm_function fname
758feda42074db4786f308efdca56a54e1366398f1John Reck    .func fname
768feda42074db4786f308efdca56a54e1366398f1John Reck    .global fname
778feda42074db4786f308efdca56a54e1366398f1John Reck#ifdef __ELF__
788feda42074db4786f308efdca56a54e1366398f1John Reck    .hidden fname
798feda42074db4786f308efdca56a54e1366398f1John Reck    .type fname, %function
808feda42074db4786f308efdca56a54e1366398f1John Reck#endif
818feda42074db4786f308efdca56a54e1366398f1John Reckfname:
828feda42074db4786f308efdca56a54e1366398f1John Reck.endm
838feda42074db4786f308efdca56a54e1366398f1John Reck
848feda42074db4786f308efdca56a54e1366398f1John Reck.macro bilinear_load_8888 reg1, reg2, tmp
858feda42074db4786f308efdca56a54e1366398f1John Reck    mov       TMP1, X, asr #16
868feda42074db4786f308efdca56a54e1366398f1John Reck    add       X, X, UX
878feda42074db4786f308efdca56a54e1366398f1John Reck    add       TMP1, TOP, TMP1, asl #2
888feda42074db4786f308efdca56a54e1366398f1John Reck    vld1.32   {reg1}, [TMP1], STRIDE
898feda42074db4786f308efdca56a54e1366398f1John Reck    vld1.32   {reg2}, [TMP1]
908feda42074db4786f308efdca56a54e1366398f1John Reck.endm
918feda42074db4786f308efdca56a54e1366398f1John Reck
928feda42074db4786f308efdca56a54e1366398f1John Reck.macro bilinear_load_and_vertical_interpolate_two_8888 \
938feda42074db4786f308efdca56a54e1366398f1John Reck                    acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2
948feda42074db4786f308efdca56a54e1366398f1John Reck
958feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_load_8888 reg1, reg2, tmp1
968feda42074db4786f308efdca56a54e1366398f1John Reck    vmull.u8  acc1, reg1, d28
978feda42074db4786f308efdca56a54e1366398f1John Reck    vmlal.u8  acc1, reg2, d29
988feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_load_8888 reg3, reg4, tmp2
998feda42074db4786f308efdca56a54e1366398f1John Reck    vmull.u8  acc2, reg3, d28
1008feda42074db4786f308efdca56a54e1366398f1John Reck    vmlal.u8  acc2, reg4, d29
1018feda42074db4786f308efdca56a54e1366398f1John Reck.endm
1028feda42074db4786f308efdca56a54e1366398f1John Reck
1038feda42074db4786f308efdca56a54e1366398f1John Reck.macro bilinear_store_8888 numpix, tmp1, tmp2
1048feda42074db4786f308efdca56a54e1366398f1John Reck.if numpix == 4
1058feda42074db4786f308efdca56a54e1366398f1John Reck    vst1.32   {d0, d1}, [OUT, :128]!
1068feda42074db4786f308efdca56a54e1366398f1John Reck.elseif numpix == 2
1078feda42074db4786f308efdca56a54e1366398f1John Reck    vst1.32   {d0}, [OUT, :64]!
1088feda42074db4786f308efdca56a54e1366398f1John Reck.elseif numpix == 1
1098feda42074db4786f308efdca56a54e1366398f1John Reck    vst1.32   {d0[0]}, [OUT, :32]!
1108feda42074db4786f308efdca56a54e1366398f1John Reck.else
1118feda42074db4786f308efdca56a54e1366398f1John Reck    .error bilinear_store_8888 numpix is unsupported
1128feda42074db4786f308efdca56a54e1366398f1John Reck.endif
1138feda42074db4786f308efdca56a54e1366398f1John Reck.endm
1148feda42074db4786f308efdca56a54e1366398f1John Reck
1158feda42074db4786f308efdca56a54e1366398f1John Reck.macro bilinear_interpolate_last_pixel src_fmt, dst_fmt
1168feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_load_&src_fmt d0, d1, d2
1178feda42074db4786f308efdca56a54e1366398f1John Reck    vmull.u8  q1, d0, d28
1188feda42074db4786f308efdca56a54e1366398f1John Reck    vmlal.u8  q1, d1, d29
1198feda42074db4786f308efdca56a54e1366398f1John Reck    /* 5 cycles bubble */
1208feda42074db4786f308efdca56a54e1366398f1John Reck    vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS
1218feda42074db4786f308efdca56a54e1366398f1John Reck    vmlsl.u16 q0, d2, d30
1228feda42074db4786f308efdca56a54e1366398f1John Reck    vmlal.u16 q0, d3, d30
1238feda42074db4786f308efdca56a54e1366398f1John Reck    /* 5 cycles bubble */
1248feda42074db4786f308efdca56a54e1366398f1John Reck    vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
1258feda42074db4786f308efdca56a54e1366398f1John Reck    /* 3 cycles bubble */
1268feda42074db4786f308efdca56a54e1366398f1John Reck    vmovn.u16 d0, q0
1278feda42074db4786f308efdca56a54e1366398f1John Reck    /* 1 cycle bubble */
1288feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_store_&dst_fmt 1, q2, q3
1298feda42074db4786f308efdca56a54e1366398f1John Reck.endm
1308feda42074db4786f308efdca56a54e1366398f1John Reck
1318feda42074db4786f308efdca56a54e1366398f1John Reck.macro bilinear_interpolate_two_pixels src_fmt, dst_fmt
1328feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_load_and_vertical_interpolate_two_&src_fmt \
1338feda42074db4786f308efdca56a54e1366398f1John Reck                q1, q11, d0, d1, d20, d21, d22, d23
1348feda42074db4786f308efdca56a54e1366398f1John Reck    vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS
1358feda42074db4786f308efdca56a54e1366398f1John Reck    vmlsl.u16 q0, d2, d30
1368feda42074db4786f308efdca56a54e1366398f1John Reck    vmlal.u16 q0, d3, d30
1378feda42074db4786f308efdca56a54e1366398f1John Reck    vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS
1388feda42074db4786f308efdca56a54e1366398f1John Reck    vmlsl.u16 q10, d22, d31
1398feda42074db4786f308efdca56a54e1366398f1John Reck    vmlal.u16 q10, d23, d31
1408feda42074db4786f308efdca56a54e1366398f1John Reck    vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
1418feda42074db4786f308efdca56a54e1366398f1John Reck    vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS)
1428feda42074db4786f308efdca56a54e1366398f1John Reck    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
1438feda42074db4786f308efdca56a54e1366398f1John Reck    vadd.u16  q12, q12, q13
1448feda42074db4786f308efdca56a54e1366398f1John Reck    vmovn.u16 d0, q0
1458feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_store_&dst_fmt 2, q2, q3
1468feda42074db4786f308efdca56a54e1366398f1John Reck.endm
1478feda42074db4786f308efdca56a54e1366398f1John Reck
1488feda42074db4786f308efdca56a54e1366398f1John Reck.macro bilinear_interpolate_four_pixels src_fmt, dst_fmt
1498feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_load_and_vertical_interpolate_four_&src_fmt \
1508feda42074db4786f308efdca56a54e1366398f1John Reck                q1, q11, d0, d1, d20, d21, d22, d23 \
1518feda42074db4786f308efdca56a54e1366398f1John Reck                q3, q9,  d4, d5, d16, d17, d18, d19
1528feda42074db4786f308efdca56a54e1366398f1John Reck    pld       [TMP1, PF_OFFS]
1538feda42074db4786f308efdca56a54e1366398f1John Reck    sub       TMP1, TMP1, STRIDE
1548feda42074db4786f308efdca56a54e1366398f1John Reck    vshll.u16 q0, d2, #BILINEAR_INTERPOLATION_BITS
1558feda42074db4786f308efdca56a54e1366398f1John Reck    vmlsl.u16 q0, d2, d30
1568feda42074db4786f308efdca56a54e1366398f1John Reck    vmlal.u16 q0, d3, d30
1578feda42074db4786f308efdca56a54e1366398f1John Reck    vshll.u16 q10, d22, #BILINEAR_INTERPOLATION_BITS
1588feda42074db4786f308efdca56a54e1366398f1John Reck    vmlsl.u16 q10, d22, d31
1598feda42074db4786f308efdca56a54e1366398f1John Reck    vmlal.u16 q10, d23, d31
1608feda42074db4786f308efdca56a54e1366398f1John Reck    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
1618feda42074db4786f308efdca56a54e1366398f1John Reck    vshll.u16 q2, d6, #BILINEAR_INTERPOLATION_BITS
1628feda42074db4786f308efdca56a54e1366398f1John Reck    vmlsl.u16 q2, d6, d30
1638feda42074db4786f308efdca56a54e1366398f1John Reck    vmlal.u16 q2, d7, d30
1648feda42074db4786f308efdca56a54e1366398f1John Reck    vshll.u16 q8, d18, #BILINEAR_INTERPOLATION_BITS
1658feda42074db4786f308efdca56a54e1366398f1John Reck    pld       [TMP2, PF_OFFS]
1668feda42074db4786f308efdca56a54e1366398f1John Reck    vmlsl.u16 q8, d18, d31
1678feda42074db4786f308efdca56a54e1366398f1John Reck    vmlal.u16 q8, d19, d31
1688feda42074db4786f308efdca56a54e1366398f1John Reck    vadd.u16  q12, q12, q13
1698feda42074db4786f308efdca56a54e1366398f1John Reck    vshrn.u32 d0, q0, #(2 * BILINEAR_INTERPOLATION_BITS)
1708feda42074db4786f308efdca56a54e1366398f1John Reck    vshrn.u32 d1, q10, #(2 * BILINEAR_INTERPOLATION_BITS)
1718feda42074db4786f308efdca56a54e1366398f1John Reck    vshrn.u32 d4, q2, #(2 * BILINEAR_INTERPOLATION_BITS)
1728feda42074db4786f308efdca56a54e1366398f1John Reck    vshrn.u32 d5, q8, #(2 * BILINEAR_INTERPOLATION_BITS)
1738feda42074db4786f308efdca56a54e1366398f1John Reck    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
1748feda42074db4786f308efdca56a54e1366398f1John Reck    vmovn.u16 d0, q0
1758feda42074db4786f308efdca56a54e1366398f1John Reck    vmovn.u16 d1, q2
1768feda42074db4786f308efdca56a54e1366398f1John Reck    vadd.u16  q12, q12, q13
1778feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_store_&dst_fmt 4, q2, q3
1788feda42074db4786f308efdca56a54e1366398f1John Reck.endm
1798feda42074db4786f308efdca56a54e1366398f1John Reck
1808feda42074db4786f308efdca56a54e1366398f1John Reck.macro bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
1818feda42074db4786f308efdca56a54e1366398f1John Reck.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
1828feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_head
1838feda42074db4786f308efdca56a54e1366398f1John Reck.else
1848feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_interpolate_four_pixels src_fmt, dst_fmt
1858feda42074db4786f308efdca56a54e1366398f1John Reck.endif
1868feda42074db4786f308efdca56a54e1366398f1John Reck.endm
1878feda42074db4786f308efdca56a54e1366398f1John Reck
1888feda42074db4786f308efdca56a54e1366398f1John Reck.macro bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
1898feda42074db4786f308efdca56a54e1366398f1John Reck.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
1908feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail
1918feda42074db4786f308efdca56a54e1366398f1John Reck.endif
1928feda42074db4786f308efdca56a54e1366398f1John Reck.endm
1938feda42074db4786f308efdca56a54e1366398f1John Reck
1948feda42074db4786f308efdca56a54e1366398f1John Reck.macro bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
1958feda42074db4786f308efdca56a54e1366398f1John Reck.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
1968feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail_head
1978feda42074db4786f308efdca56a54e1366398f1John Reck.else
1988feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_interpolate_four_pixels src_fmt, dst_fmt
1998feda42074db4786f308efdca56a54e1366398f1John Reck.endif
2008feda42074db4786f308efdca56a54e1366398f1John Reck.endm
2018feda42074db4786f308efdca56a54e1366398f1John Reck
2028feda42074db4786f308efdca56a54e1366398f1John Reck.macro bilinear_load_and_vertical_interpolate_four_8888 \
2038feda42074db4786f308efdca56a54e1366398f1John Reck                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
2048feda42074db4786f308efdca56a54e1366398f1John Reck                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
2058feda42074db4786f308efdca56a54e1366398f1John Reck
2068feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_load_and_vertical_interpolate_two_8888 \
2078feda42074db4786f308efdca56a54e1366398f1John Reck                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi
2088feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_load_and_vertical_interpolate_two_8888 \
2098feda42074db4786f308efdca56a54e1366398f1John Reck                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
2108feda42074db4786f308efdca56a54e1366398f1John Reck.endm
2118feda42074db4786f308efdca56a54e1366398f1John Reck
2128feda42074db4786f308efdca56a54e1366398f1John Reck.macro generate_bilinear_scanline_func fname, src_fmt, dst_fmt, \
2138feda42074db4786f308efdca56a54e1366398f1John Reck                                       src_bpp_shift, dst_bpp_shift, \
2148feda42074db4786f308efdca56a54e1366398f1John Reck                                       prefetch_distance, flags
2158feda42074db4786f308efdca56a54e1366398f1John Reck
2168feda42074db4786f308efdca56a54e1366398f1John Reckpixman_asm_function fname
2178feda42074db4786f308efdca56a54e1366398f1John Reck    OUT       .req      r0
2188feda42074db4786f308efdca56a54e1366398f1John Reck    TOP       .req      r1
2198feda42074db4786f308efdca56a54e1366398f1John Reck    BOTTOM    .req      r2
2208feda42074db4786f308efdca56a54e1366398f1John Reck    WT        .req      r3
2218feda42074db4786f308efdca56a54e1366398f1John Reck    WB        .req      r4
2228feda42074db4786f308efdca56a54e1366398f1John Reck    X         .req      r5
2238feda42074db4786f308efdca56a54e1366398f1John Reck    UX        .req      r6
2248feda42074db4786f308efdca56a54e1366398f1John Reck    WIDTH     .req      ip
2258feda42074db4786f308efdca56a54e1366398f1John Reck    TMP1      .req      r3
2268feda42074db4786f308efdca56a54e1366398f1John Reck    TMP2      .req      r4
2278feda42074db4786f308efdca56a54e1366398f1John Reck    PF_OFFS   .req      r7
2288feda42074db4786f308efdca56a54e1366398f1John Reck    TMP3      .req      r8
2298feda42074db4786f308efdca56a54e1366398f1John Reck    TMP4      .req      r9
2308feda42074db4786f308efdca56a54e1366398f1John Reck    STRIDE    .req      r2
2318feda42074db4786f308efdca56a54e1366398f1John Reck
2328feda42074db4786f308efdca56a54e1366398f1John Reck    mov       ip, sp
2338feda42074db4786f308efdca56a54e1366398f1John Reck    push      {r4, r5, r6, r7, r8, r9}
2348feda42074db4786f308efdca56a54e1366398f1John Reck    mov       PF_OFFS, #prefetch_distance
2358feda42074db4786f308efdca56a54e1366398f1John Reck    ldmia     ip, {WB, X, UX, WIDTH}
2368feda42074db4786f308efdca56a54e1366398f1John Reck    mul       PF_OFFS, PF_OFFS, UX
2378feda42074db4786f308efdca56a54e1366398f1John Reck
2388feda42074db4786f308efdca56a54e1366398f1John Reck.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
2398feda42074db4786f308efdca56a54e1366398f1John Reck    vpush     {d8-d15}
2408feda42074db4786f308efdca56a54e1366398f1John Reck.endif
2418feda42074db4786f308efdca56a54e1366398f1John Reck
2428feda42074db4786f308efdca56a54e1366398f1John Reck    sub       STRIDE, BOTTOM, TOP
2438feda42074db4786f308efdca56a54e1366398f1John Reck    .unreq    BOTTOM
2448feda42074db4786f308efdca56a54e1366398f1John Reck
2458feda42074db4786f308efdca56a54e1366398f1John Reck    cmp       WIDTH, #0
2468feda42074db4786f308efdca56a54e1366398f1John Reck    ble       3f
2478feda42074db4786f308efdca56a54e1366398f1John Reck
2488feda42074db4786f308efdca56a54e1366398f1John Reck    vdup.u16  q12, X
2498feda42074db4786f308efdca56a54e1366398f1John Reck    vdup.u16  q13, UX
2508feda42074db4786f308efdca56a54e1366398f1John Reck    vdup.u8   d28, WT
2518feda42074db4786f308efdca56a54e1366398f1John Reck    vdup.u8   d29, WB
2528feda42074db4786f308efdca56a54e1366398f1John Reck    vadd.u16  d25, d25, d26
2538feda42074db4786f308efdca56a54e1366398f1John Reck
2548feda42074db4786f308efdca56a54e1366398f1John Reck    /* ensure good destination alignment  */
2558feda42074db4786f308efdca56a54e1366398f1John Reck    cmp       WIDTH, #1
2568feda42074db4786f308efdca56a54e1366398f1John Reck    blt       0f
2578feda42074db4786f308efdca56a54e1366398f1John Reck    tst       OUT, #(1 << dst_bpp_shift)
2588feda42074db4786f308efdca56a54e1366398f1John Reck    beq       0f
2598feda42074db4786f308efdca56a54e1366398f1John Reck    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
2608feda42074db4786f308efdca56a54e1366398f1John Reck    vadd.u16  q12, q12, q13
2618feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_interpolate_last_pixel src_fmt, dst_fmt
2628feda42074db4786f308efdca56a54e1366398f1John Reck    sub       WIDTH, WIDTH, #1
2638feda42074db4786f308efdca56a54e1366398f1John Reck0:
2648feda42074db4786f308efdca56a54e1366398f1John Reck    vadd.u16  q13, q13, q13
2658feda42074db4786f308efdca56a54e1366398f1John Reck    vshr.u16  q15, q12, #(16 - BILINEAR_INTERPOLATION_BITS)
2668feda42074db4786f308efdca56a54e1366398f1John Reck    vadd.u16  q12, q12, q13
2678feda42074db4786f308efdca56a54e1366398f1John Reck
2688feda42074db4786f308efdca56a54e1366398f1John Reck    cmp       WIDTH, #2
2698feda42074db4786f308efdca56a54e1366398f1John Reck    blt       0f
2708feda42074db4786f308efdca56a54e1366398f1John Reck    tst       OUT, #(1 << (dst_bpp_shift + 1))
2718feda42074db4786f308efdca56a54e1366398f1John Reck    beq       0f
2728feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_interpolate_two_pixels src_fmt, dst_fmt
2738feda42074db4786f308efdca56a54e1366398f1John Reck    sub       WIDTH, WIDTH, #2
2748feda42074db4786f308efdca56a54e1366398f1John Reck0:
2758feda42074db4786f308efdca56a54e1366398f1John Reck.if ((flags) & BILINEAR_FLAG_UNROLL_8) != 0
2768feda42074db4786f308efdca56a54e1366398f1John Reck/*********** 8 pixels per iteration *****************/
2778feda42074db4786f308efdca56a54e1366398f1John Reck    cmp       WIDTH, #4
2788feda42074db4786f308efdca56a54e1366398f1John Reck    blt       0f
2798feda42074db4786f308efdca56a54e1366398f1John Reck    tst       OUT, #(1 << (dst_bpp_shift + 2))
2808feda42074db4786f308efdca56a54e1366398f1John Reck    beq       0f
2818feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_interpolate_four_pixels src_fmt, dst_fmt
2828feda42074db4786f308efdca56a54e1366398f1John Reck    sub       WIDTH, WIDTH, #4
2838feda42074db4786f308efdca56a54e1366398f1John Reck0:
2848feda42074db4786f308efdca56a54e1366398f1John Reck    subs      WIDTH, WIDTH, #8
2858feda42074db4786f308efdca56a54e1366398f1John Reck    blt       1f
2868feda42074db4786f308efdca56a54e1366398f1John Reck    mov       PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
2878feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt
2888feda42074db4786f308efdca56a54e1366398f1John Reck    subs      WIDTH, WIDTH, #8
2898feda42074db4786f308efdca56a54e1366398f1John Reck    blt       5f
2908feda42074db4786f308efdca56a54e1366398f1John Reck0:
2918feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_interpolate_eight_pixels_tail_head src_fmt, dst_fmt
2928feda42074db4786f308efdca56a54e1366398f1John Reck    subs      WIDTH, WIDTH, #8
2938feda42074db4786f308efdca56a54e1366398f1John Reck    bge       0b
2948feda42074db4786f308efdca56a54e1366398f1John Reck5:
2958feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_interpolate_eight_pixels_tail src_fmt, dst_fmt
2968feda42074db4786f308efdca56a54e1366398f1John Reck1:
2978feda42074db4786f308efdca56a54e1366398f1John Reck    tst       WIDTH, #4
2988feda42074db4786f308efdca56a54e1366398f1John Reck    beq       2f
2998feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_interpolate_four_pixels src_fmt, dst_fmt
3008feda42074db4786f308efdca56a54e1366398f1John Reck2:
3018feda42074db4786f308efdca56a54e1366398f1John Reck.else
3028feda42074db4786f308efdca56a54e1366398f1John Reck/*********** 4 pixels per iteration *****************/
3038feda42074db4786f308efdca56a54e1366398f1John Reck    subs      WIDTH, WIDTH, #4
3048feda42074db4786f308efdca56a54e1366398f1John Reck    blt       1f
3058feda42074db4786f308efdca56a54e1366398f1John Reck    mov       PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
3068feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
3078feda42074db4786f308efdca56a54e1366398f1John Reck    subs      WIDTH, WIDTH, #4
3088feda42074db4786f308efdca56a54e1366398f1John Reck    blt       5f
3098feda42074db4786f308efdca56a54e1366398f1John Reck0:
3108feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
3118feda42074db4786f308efdca56a54e1366398f1John Reck    subs      WIDTH, WIDTH, #4
3128feda42074db4786f308efdca56a54e1366398f1John Reck    bge       0b
3138feda42074db4786f308efdca56a54e1366398f1John Reck5:
3148feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
3158feda42074db4786f308efdca56a54e1366398f1John Reck1:
3168feda42074db4786f308efdca56a54e1366398f1John Reck/****************************************************/
3178feda42074db4786f308efdca56a54e1366398f1John Reck.endif
3188feda42074db4786f308efdca56a54e1366398f1John Reck    /* handle the remaining trailing pixels */
3198feda42074db4786f308efdca56a54e1366398f1John Reck    tst       WIDTH, #2
3208feda42074db4786f308efdca56a54e1366398f1John Reck    beq       2f
3218feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_interpolate_two_pixels src_fmt, dst_fmt
3228feda42074db4786f308efdca56a54e1366398f1John Reck2:
3238feda42074db4786f308efdca56a54e1366398f1John Reck    tst       WIDTH, #1
3248feda42074db4786f308efdca56a54e1366398f1John Reck    beq       3f
3258feda42074db4786f308efdca56a54e1366398f1John Reck    bilinear_interpolate_last_pixel src_fmt, dst_fmt
3268feda42074db4786f308efdca56a54e1366398f1John Reck3:
3278feda42074db4786f308efdca56a54e1366398f1John Reck.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
3288feda42074db4786f308efdca56a54e1366398f1John Reck    vpop      {d8-d15}
3298feda42074db4786f308efdca56a54e1366398f1John Reck.endif
3308feda42074db4786f308efdca56a54e1366398f1John Reck    pop       {r4, r5, r6, r7, r8, r9}
3318feda42074db4786f308efdca56a54e1366398f1John Reck    bx        lr
3328feda42074db4786f308efdca56a54e1366398f1John Reck
3338feda42074db4786f308efdca56a54e1366398f1John Reck    .unreq    OUT
3348feda42074db4786f308efdca56a54e1366398f1John Reck    .unreq    TOP
3358feda42074db4786f308efdca56a54e1366398f1John Reck    .unreq    WT
3368feda42074db4786f308efdca56a54e1366398f1John Reck    .unreq    WB
3378feda42074db4786f308efdca56a54e1366398f1John Reck    .unreq    X
3388feda42074db4786f308efdca56a54e1366398f1John Reck    .unreq    UX
3398feda42074db4786f308efdca56a54e1366398f1John Reck    .unreq    WIDTH
3408feda42074db4786f308efdca56a54e1366398f1John Reck    .unreq    TMP1
3418feda42074db4786f308efdca56a54e1366398f1John Reck    .unreq    TMP2
3428feda42074db4786f308efdca56a54e1366398f1John Reck    .unreq    PF_OFFS
3438feda42074db4786f308efdca56a54e1366398f1John Reck    .unreq    TMP3
3448feda42074db4786f308efdca56a54e1366398f1John Reck    .unreq    TMP4
3458feda42074db4786f308efdca56a54e1366398f1John Reck    .unreq    STRIDE
3468feda42074db4786f308efdca56a54e1366398f1John Reck.endfunc
3478feda42074db4786f308efdca56a54e1366398f1John Reck
3488feda42074db4786f308efdca56a54e1366398f1John Reck.endm
3498feda42074db4786f308efdca56a54e1366398f1John Reck
3508feda42074db4786f308efdca56a54e1366398f1John Reckgenerate_bilinear_scanline_func \
3518feda42074db4786f308efdca56a54e1366398f1John Reck    pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, \
3528feda42074db4786f308efdca56a54e1366398f1John Reck    2, 2, 28, BILINEAR_FLAG_UNROLL_4
3538feda42074db4786f308efdca56a54e1366398f1John Reck
354