1b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
2b50c217251b086440efcdb273c22f86a06c80cbaChris Craik/* filter_neon.S - NEON optimised filter functions
3b50c217251b086440efcdb273c22f86a06c80cbaChris Craik *
4b50c217251b086440efcdb273c22f86a06c80cbaChris Craik * Copyright (c) 2013 Glenn Randers-Pehrson
5b50c217251b086440efcdb273c22f86a06c80cbaChris Craik * Written by Mans Rullgard, 2011.
6b478e66e7c2621eef5f465e4629ce642db00716bSireesh Tripurari * Last changed in libpng 1.6.8 [December 19, 2013]
7b50c217251b086440efcdb273c22f86a06c80cbaChris Craik *
8b50c217251b086440efcdb273c22f86a06c80cbaChris Craik * This code is released under the libpng license.
9b50c217251b086440efcdb273c22f86a06c80cbaChris Craik * For conditions of distribution and use, see the disclaimer
10b50c217251b086440efcdb273c22f86a06c80cbaChris Craik * and license in png.h
11b50c217251b086440efcdb273c22f86a06c80cbaChris Craik */
12b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
13b50c217251b086440efcdb273c22f86a06c80cbaChris Craik/* This is required to get the symbol renames, which are #defines, and also
14b478e66e7c2621eef5f465e4629ce642db00716bSireesh Tripurari * includes the definition (or not) of PNG_ARM_NEON_OPT and
15b478e66e7c2621eef5f465e4629ce642db00716bSireesh Tripurari * PNG_ARM_NEON_IMPLEMENTATION.
16b50c217251b086440efcdb273c22f86a06c80cbaChris Craik */
17b50c217251b086440efcdb273c22f86a06c80cbaChris Craik#define PNG_VERSION_INFO_ONLY
18b50c217251b086440efcdb273c22f86a06c80cbaChris Craik#include "../pngpriv.h"
19b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
20b50c217251b086440efcdb273c22f86a06c80cbaChris Craik#if defined(__linux__) && defined(__ELF__)
21b50c217251b086440efcdb273c22f86a06c80cbaChris Craik.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
22b50c217251b086440efcdb273c22f86a06c80cbaChris Craik#endif
23b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
24b478e66e7c2621eef5f465e4629ce642db00716bSireesh Tripurari/* Assembler NEON support - only works for 32-bit ARM (i.e. it does not work for
25b478e66e7c2621eef5f465e4629ce642db00716bSireesh Tripurari * ARM64).  The code in arm/filter_neon_intrinsics.c supports ARM64, however it
26b478e66e7c2621eef5f465e4629ce642db00716bSireesh Tripurari * only works if -mfpu=neon is specified on the GCC command line.  See pngpriv.h
27b478e66e7c2621eef5f465e4629ce642db00716bSireesh Tripurari * for the logic which sets PNG_USE_ARM_NEON_ASM:
28b478e66e7c2621eef5f465e4629ce642db00716bSireesh Tripurari */
29b478e66e7c2621eef5f465e4629ce642db00716bSireesh Tripurari#if PNG_ARM_NEON_IMPLEMENTATION == 2 /* hand-coded assembler */
30b478e66e7c2621eef5f465e4629ce642db00716bSireesh Tripurari
31b50c217251b086440efcdb273c22f86a06c80cbaChris Craik#ifdef PNG_READ_SUPPORTED
32b50c217251b086440efcdb273c22f86a06c80cbaChris Craik#if PNG_ARM_NEON_OPT > 0
33b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
34b50c217251b086440efcdb273c22f86a06c80cbaChris Craik#ifdef __ELF__
35b50c217251b086440efcdb273c22f86a06c80cbaChris Craik#   define ELF
36b50c217251b086440efcdb273c22f86a06c80cbaChris Craik#else
37b50c217251b086440efcdb273c22f86a06c80cbaChris Craik#   define ELF @
38b50c217251b086440efcdb273c22f86a06c80cbaChris Craik#endif
39b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
40b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        .arch armv7-a
41b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        .fpu  neon
42b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
43b50c217251b086440efcdb273c22f86a06c80cbaChris Craik.macro  func    name, export=0
44b50c217251b086440efcdb273c22f86a06c80cbaChris Craik    .macro endfunc
45b50c217251b086440efcdb273c22f86a06c80cbaChris CraikELF     .size   \name, . - \name
46b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        .endfunc
47b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        .purgem endfunc
48b50c217251b086440efcdb273c22f86a06c80cbaChris Craik    .endm
49b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        .text
50b50c217251b086440efcdb273c22f86a06c80cbaChris Craik    .if \export
51b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        .global \name
52b50c217251b086440efcdb273c22f86a06c80cbaChris Craik    .endif
53b50c217251b086440efcdb273c22f86a06c80cbaChris CraikELF     .type   \name, STT_FUNC
54b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        .func   \name
55b50c217251b086440efcdb273c22f86a06c80cbaChris Craik\name:
56b50c217251b086440efcdb273c22f86a06c80cbaChris Craik.endm
57b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
58b50c217251b086440efcdb273c22f86a06c80cbaChris Craikfunc    png_read_filter_row_sub4_neon, export=1
59b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        ldr             r3,  [r0, #4]           @ rowbytes
60b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vmov.i8         d3,  #0
61b50c217251b086440efcdb273c22f86a06c80cbaChris Craik1:
62b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vld4.32         {d4[],d5[],d6[],d7[]},    [r1,:128]
63b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d0,  d3,  d4
64b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d1,  d0,  d5
65b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d2,  d1,  d6
66b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d3,  d2,  d7
67b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vst4.32         {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
68b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        subs            r3,  r3,  #16
69b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        bgt             1b
70b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
71b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        bx              lr
72b50c217251b086440efcdb273c22f86a06c80cbaChris Craikendfunc
73b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
74b50c217251b086440efcdb273c22f86a06c80cbaChris Craikfunc    png_read_filter_row_sub3_neon, export=1
75b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        ldr             r3,  [r0, #4]           @ rowbytes
76b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vmov.i8         d3,  #0
77b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        mov             r0,  r1
78b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        mov             r2,  #3
79b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        mov             r12, #12
80b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vld1.8          {q11},    [r0], r12
81b50c217251b086440efcdb273c22f86a06c80cbaChris Craik1:
82b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vext.8          d5,  d22, d23, #3
83b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d0,  d3,  d22
84b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vext.8          d6,  d22, d23, #6
85b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d1,  d0,  d5
86b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vext.8          d7,  d23, d23, #1
87b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vld1.8          {q11},    [r0], r12
88b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vst1.32         {d0[0]},  [r1,:32], r2
89b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d2,  d1,  d6
90b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vst1.32         {d1[0]},  [r1], r2
91b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d3,  d2,  d7
92b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vst1.32         {d2[0]},  [r1], r2
93b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vst1.32         {d3[0]},  [r1], r2
94b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        subs            r3,  r3,  #12
95b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        bgt             1b
96b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
97b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        bx              lr
98b50c217251b086440efcdb273c22f86a06c80cbaChris Craikendfunc
99b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
100b50c217251b086440efcdb273c22f86a06c80cbaChris Craikfunc    png_read_filter_row_up_neon, export=1
101b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        ldr             r3,  [r0, #4]           @ rowbytes
102b50c217251b086440efcdb273c22f86a06c80cbaChris Craik1:
103b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vld1.8          {q0}, [r1,:128]
104b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vld1.8          {q1}, [r2,:128]!
105b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         q0,  q0,  q1
106b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vst1.8          {q0}, [r1,:128]!
107b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        subs            r3,  r3,  #16
108b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        bgt             1b
109b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
110b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        bx              lr
111b50c217251b086440efcdb273c22f86a06c80cbaChris Craikendfunc
112b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
113b50c217251b086440efcdb273c22f86a06c80cbaChris Craikfunc    png_read_filter_row_avg4_neon, export=1
114b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        ldr             r12, [r0, #4]           @ rowbytes
115b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vmov.i8         d3,  #0
116b50c217251b086440efcdb273c22f86a06c80cbaChris Craik1:
117b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vld4.32         {d4[],d5[],d6[],d7[]},    [r1,:128]
118b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vld4.32         {d16[],d17[],d18[],d19[]},[r2,:128]!
119b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vhadd.u8        d0,  d3,  d16
120b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d0,  d0,  d4
121b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vhadd.u8        d1,  d0,  d17
122b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d1,  d1,  d5
123b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vhadd.u8        d2,  d1,  d18
124b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d2,  d2,  d6
125b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vhadd.u8        d3,  d2,  d19
126b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d3,  d3,  d7
127b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vst4.32         {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
128b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        subs            r12, r12, #16
129b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        bgt             1b
130b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
131b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        bx              lr
132b50c217251b086440efcdb273c22f86a06c80cbaChris Craikendfunc
133b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
134b50c217251b086440efcdb273c22f86a06c80cbaChris Craikfunc    png_read_filter_row_avg3_neon, export=1
135b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        push            {r4,lr}
136b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        ldr             r12, [r0, #4]           @ rowbytes
137b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vmov.i8         d3,  #0
138b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        mov             r0,  r1
139b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        mov             r4,  #3
140b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        mov             lr,  #12
141b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vld1.8          {q11},    [r0], lr
142b50c217251b086440efcdb273c22f86a06c80cbaChris Craik1:
143b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vld1.8          {q10},    [r2], lr
144b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vext.8          d5,  d22, d23, #3
145b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vhadd.u8        d0,  d3,  d20
146b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vext.8          d17, d20, d21, #3
147b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d0,  d0,  d22
148b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vext.8          d6,  d22, d23, #6
149b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vhadd.u8        d1,  d0,  d17
150b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vext.8          d18, d20, d21, #6
151b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d1,  d1,  d5
152b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vext.8          d7,  d23, d23, #1
153b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vld1.8          {q11},    [r0], lr
154b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vst1.32         {d0[0]},  [r1,:32], r4
155b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vhadd.u8        d2,  d1,  d18
156b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vst1.32         {d1[0]},  [r1], r4
157b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vext.8          d19, d21, d21, #1
158b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d2,  d2,  d6
159b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vhadd.u8        d3,  d2,  d19
160b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vst1.32         {d2[0]},  [r1], r4
161b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d3,  d3,  d7
162b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vst1.32         {d3[0]},  [r1], r4
163b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        subs            r12, r12, #12
164b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        bgt             1b
165b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
166b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        pop             {r4,pc}
167b50c217251b086440efcdb273c22f86a06c80cbaChris Craikendfunc
168b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
169b50c217251b086440efcdb273c22f86a06c80cbaChris Craik.macro  paeth           rx,  ra,  rb,  rc
170b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vaddl.u8        q12, \ra, \rb           @ a + b
171b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vaddl.u8        q15, \rc, \rc           @ 2*c
172b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vabdl.u8        q13, \rb, \rc           @ pa
173b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vabdl.u8        q14, \ra, \rc           @ pb
174b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vabd.u16        q15, q12, q15           @ pc
175b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vcle.u16        q12, q13, q14           @ pa <= pb
176b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vcle.u16        q13, q13, q15           @ pa <= pc
177b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vcle.u16        q14, q14, q15           @ pb <= pc
178b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vand            q12, q12, q13           @ pa <= pb && pa <= pc
179b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vmovn.u16       d28, q14
180b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vmovn.u16       \rx, q12
181b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vbsl            d28, \rb, \rc
182b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vbsl            \rx, \ra, d28
183b50c217251b086440efcdb273c22f86a06c80cbaChris Craik.endm
184b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
185b50c217251b086440efcdb273c22f86a06c80cbaChris Craikfunc    png_read_filter_row_paeth4_neon, export=1
186b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        ldr             r12, [r0, #4]           @ rowbytes
187b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vmov.i8         d3,  #0
188b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vmov.i8         d20, #0
189b50c217251b086440efcdb273c22f86a06c80cbaChris Craik1:
190b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vld4.32         {d4[],d5[],d6[],d7[]},    [r1,:128]
191b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vld4.32         {d16[],d17[],d18[],d19[]},[r2,:128]!
192b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        paeth           d0,  d3,  d16, d20
193b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d0,  d0,  d4
194b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        paeth           d1,  d0,  d17, d16
195b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d1,  d1,  d5
196b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        paeth           d2,  d1,  d18, d17
197b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d2,  d2,  d6
198b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        paeth           d3,  d2,  d19, d18
199b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vmov            d20, d19
200b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d3,  d3,  d7
201b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vst4.32         {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
202b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        subs            r12, r12, #16
203b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        bgt             1b
204b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
205b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        bx              lr
206b50c217251b086440efcdb273c22f86a06c80cbaChris Craikendfunc
207b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
208b50c217251b086440efcdb273c22f86a06c80cbaChris Craikfunc    png_read_filter_row_paeth3_neon, export=1
209b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        push            {r4,lr}
210b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        ldr             r12, [r0, #4]           @ rowbytes
211b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vmov.i8         d3,  #0
212b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vmov.i8         d4,  #0
213b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        mov             r0,  r1
214b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        mov             r4,  #3
215b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        mov             lr,  #12
216b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vld1.8          {q11},    [r0], lr
217b50c217251b086440efcdb273c22f86a06c80cbaChris Craik1:
218b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vld1.8          {q10},    [r2], lr
219b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        paeth           d0,  d3,  d20, d4
220b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vext.8          d5,  d22, d23, #3
221b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d0,  d0,  d22
222b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vext.8          d17, d20, d21, #3
223b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        paeth           d1,  d0,  d17, d20
224b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vst1.32         {d0[0]},  [r1,:32], r4
225b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vext.8          d6,  d22, d23, #6
226b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d1,  d1,  d5
227b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vext.8          d18, d20, d21, #6
228b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        paeth           d2,  d1,  d18, d17
229b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vext.8          d7,  d23, d23, #1
230b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vld1.8          {q11},    [r0], lr
231b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vst1.32         {d1[0]},  [r1], r4
232b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d2,  d2,  d6
233b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vext.8          d19, d21, d21, #1
234b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        paeth           d3,  d2,  d19, d18
235b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vst1.32         {d2[0]},  [r1], r4
236b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vmov            d4,  d19
237b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vadd.u8         d3,  d3,  d7
238b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        vst1.32         {d3[0]},  [r1], r4
239b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        subs            r12, r12, #12
240b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        bgt             1b
241b50c217251b086440efcdb273c22f86a06c80cbaChris Craik
242b50c217251b086440efcdb273c22f86a06c80cbaChris Craik        pop             {r4,pc}
243b50c217251b086440efcdb273c22f86a06c80cbaChris Craikendfunc
244b50c217251b086440efcdb273c22f86a06c80cbaChris Craik#endif /* PNG_ARM_NEON_OPT > 0 */
245b50c217251b086440efcdb273c22f86a06c80cbaChris Craik#endif /* PNG_READ_SUPPORTED */
246b478e66e7c2621eef5f465e4629ce642db00716bSireesh Tripurari#endif /* PNG_ARM_NEON_IMPLEMENTATION == 2 (assembler) */
247