1ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
2ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * Copyright (C) 2012 The Android Open Source Project
3ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams *
4ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * Licensed under the Apache License, Version 2.0 (the "License");
5ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * you may not use this file except in compliance with the License.
6ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * You may obtain a copy of the License at
7ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams *
8ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams *      http://www.apache.org/licenses/LICENSE-2.0
9ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams *
10ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * Unless required by applicable law or agreed to in writing, software
11ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * distributed under the License is distributed on an "AS IS" BASIS,
12ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * See the License for the specific language governing permissions and
14ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams * limitations under the License.
15ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams */
16ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
17ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
18ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
19ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams#include <machine/cpu-features.h>
20ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams#include <machine/asm.h>
21ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
22ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
23ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r0 = dst
24ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r1 = y0 base pointer
25ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r2 = y1 base pointer
26ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r3 = y2 base pointer
27ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        sp = coeffs
28ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        sp = length / 2
29ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
30ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
31ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicConvolve3x3_K)
32ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        push            {r4-r8, r10, r11, lr}
33ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush           {q4-q7}
34ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
35ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Get the coeffs pointer from the stack and load the
36ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams           coefficients in the q0, q1 NEON registers */
37ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldr r4, [sp, #32+64]
38ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.16 {q0, q1}, [r4]
39ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
40ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Get count from the stack */
41ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldr r4, [sp, #36+64]
42ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
43ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Load the frequently used immediate in a register */
44ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r5, #8
45ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
46ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
47ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Load and post-increase the address by r5=#8 */
48ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.8 {q13}, [r1], r5
49ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.8 {q14}, [r2], r5
50ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.8 {q15}, [r3], r5
51ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
52ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Signal memory for data that will be used in the loop after the next */
53ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        PLD         (r1, r5)
54ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        PLD         (r2, r5)
55ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        PLD         (r3, r5)
56ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
57ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q2, d26
58ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q3, d27
59ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q4, d28
60ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q5, d29
61ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q6, d30
62ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q7, d31
63ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
64ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
65ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        The two pixel source array is
66ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        d4,  d5,  d6,  d7
67ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        d8,  d9,  d10, d11
68ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        d12, d13, d14, d15
69ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
70ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
71ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmull.s16 q8, d4, d0[0]
72ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q8, d5, d0[1]
73ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q8, d6, d0[2]
74ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q8, d8, d0[3]
75ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q8, d9, d1[0]
76ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q8, d10, d1[1]
77ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q8, d12, d1[2]
78ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q8, d13, d1[3]
79ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q8, d14, d2[0]
80ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
81ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmull.s16 q9, d5, d0[0]
82ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q9, d6, d0[1]
83ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q9, d7, d0[2]
84ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q9, d9, d0[3]
85ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q9, d10, d1[0]
86ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q9, d11, d1[1]
87ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q9, d13, d1[2]
88ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q9, d14, d1[3]
89ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q9, d15, d2[0]
90ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
91ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i32 d16, q8, #8
92ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i32 d17, q9, #8
93ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
94ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d16, q8
95ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst1.8 d16, [r0]!
96ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
97ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Are we done yet? */
98ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r4, r4, #1
99ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
100ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
101ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* We're done, bye! */
102ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop            {q4-q7}
103ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        pop             {r4-r8, r10, r11, lr}
104ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx              lr
105ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(TestConvolveK)
106ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
107ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
108ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r0 = dst
109ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r1 = src
110ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r2 = matrix
111ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r3 = length
112ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
113ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicColorMatrix4x4_K)
114ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        .save           {r4, lr}
115ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        stmfd           sp!, {r4, lr}
116ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush           {q4-q7}
117ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
118ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.16 {q2}, [r2]!
119ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.16 {q3}, [r2]!
120ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
121ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
122ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
123ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
124ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
125ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
126ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
127ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q12, d0  /* R */
128ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q13, d1  /* G */
129ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q14, d2  /* B */
130ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q15, d3  /* A */
131ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
132ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmull.s16 q8,  d24, d4[0]
133ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmull.s16 q9,  d24, d4[1]
134ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmull.s16 q10, d24, d4[2]
135ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmull.s16 q11, d24, d4[3]
136ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
137ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q8,  d26, d5[0]
138ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q9,  d26, d5[1]
139ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q10, d26, d5[2]
140ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q11, d26, d5[3]
141ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
142ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q8,  d28, d6[0]
143ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q9,  d28, d6[1]
144ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q10, d28, d6[2]
145ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q11, d28, d6[3]
146ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
147ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q8,  d30, d7[0]
148ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q9,  d30, d7[1]
149ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q10, d30, d7[2]
150ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q11, d30, d7[3]
151ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
152ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i32 d24, q8, #8
153ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i32 d26, q9, #8
154ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i32 d28, q10, #8
155ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i32 d30, q11, #8
156ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
157ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d0, q12
158ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d1, q13
159ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d2, q14
160ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d3, q15
161ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
162ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
163ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
164ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
165ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
166ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
167ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r3, r3, #1
168ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
169ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
170ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop            {q4-q7}
171ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldmfd           sp!, {r4, lr}
172ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx              lr
173ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicColorMatrix4x4_K)
174ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
175ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
176ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r0 = dst
177ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r1 = src
178ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r2 = matrix
179ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r3 = length
180ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
181ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicColorMatrix3x3_K)
182ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        .save           {r4, lr}
183ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        stmfd           sp!, {r4, lr}
184ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush           {q4-q7}
185ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
186ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.16 {q2}, [r2]!
187ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.16 {q3}, [r2]!
188ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
189ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
190ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
191ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
192ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
193ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
194ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
195ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q12, d0
196ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q13, d1
197ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q14, d2
198ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
199ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmull.s16 q8,  d24, d4[0]
200ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmull.s16 q9,  d24, d4[1]
201ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmull.s16 q10, d24, d4[2]
202ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
203ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q8,  d26, d5[0]
204ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q9,  d26, d5[1]
205ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q10, d26, d5[2]
206ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
207ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q8,  d28, d6[0]
208ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q9,  d28, d6[1]
209ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q10, d28, d6[2]
210ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
211ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i32 d24, q8, #8
212ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i32 d26, q9, #8
213ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i32 d28, q10, #8
214ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
215ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d0, q12
216ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d1, q13
217ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d2, q14
218ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
219ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
220ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
221ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
222ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
223ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
224ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r3, r3, #1
225ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
226ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
227ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop            {q4-q7}
228ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldmfd           sp!, {r4, lr}
229ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx              lr
230ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicColorMatrix3x3_K)
231ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
232ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
233ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r0 = dst
234ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r1 = src
235ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r2 = matrix
236ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r3 = length
237ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
238ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicColorMatrixDot_K)
239ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        .save           {r4, lr}
240ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        stmfd           sp!, {r4, lr}
241ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush           {q4-q7}
242ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
243ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.16 {q2}, [r2]!
244ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.16 {q3}, [r2]!
245ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
246ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
247ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
248ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
249ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
250ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
251ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
252ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q12, d0
253ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q13, d1
254ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q14, d2
255ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
256ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmull.s16 q8,  d24, d4[0]
257ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q8,  d26, d5[0]
258ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q8,  d28, d6[0]
259ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i32 d24, q8, #8
260ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d0, q12
261ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmov.u8 d1, d0
262ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmov.u8 d2, d0
263ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
264ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
265ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
266ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
267ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
268ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
269ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r3, r3, #1
270ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
271ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
272ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop            {q4-q7}
273ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldmfd           sp!, {r4, lr}
274ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx              lr
275ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicColorMatrixDot_K)
276ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
277ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
278ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
279ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Samsstatic void OneVF(float4 *out, const uchar *ptrIn, int iStride,
280ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams                  const float* gPtr, int iradius, int x1, int x2)
281ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
282ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams    r0 = out
283ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams    r1 = pin
284ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams    r2 = stride
285ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams    r3 = gptr
286ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams    r4 = sp, ct
287ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams    r5 = sp+4, x1
288ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams    r6 = sp+8, x2
289ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
290ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlurVF_K)
291ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        push            {r4-r8, r10, r11, lr}
292ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush           {q4-q7}
293ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
294ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldr r4, [sp, #32+64]
295ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldr r5, [sp, #32+64 + 4]
296ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldr r6, [sp, #32+64 + 8]
297ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
298ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
299ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        veor q10, q10, q10         /* float4 blurredPixel = 0; */
300ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        veor q11, q11, q11         /* float4 blurredPixel = 0; */
301ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        add r7, r1, r5, lsl #2  /* const uchar *pi = ptrIn + x1 * 4; */
302ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r10, r3
303ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
304ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r11, r4
305ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
306ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams2:
307ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.32 {d2}, [r7]
308ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q1, d2
309ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u16 q3, d2
310ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u16 q4, d3
311ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vcvt.f32.s32 q3, q3
312ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vcvt.f32.s32 q4, q4
313ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.32 {d0[0]}, [r10]!
314ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        add r7, r7, r2
315ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmla.f32 q10, q3, d0[0]
316ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmla.f32 q11, q4, d0[0]
317ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r11, r11, #1
318ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 2b
319ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
320ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst1.32 {q10}, [r0]!
321ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst1.32 {q11}, [r0]!
322ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        add r5, r5, #2
323ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        cmp r5, r6
324ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
325ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
326ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
327ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop            {q4-q7}
328ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        pop             {r4-r8, r10, r11, lr}
329ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx              lr
330ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlurVF_K)
331ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
332ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
333ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Samsstatic void OneVF(float4 *out, const uchar *ptrIn, int iStride,
334ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams                  const float* gPtr, int iradius, int x1, int x2)
335ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
336ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams    r0 = out
337ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams    r1 = pin
338ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams    r2 = gptr
339ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams    r3 = ct
340ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams    r4 = sp, x1
341ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams    r5 = sp+4, x2
342ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
343ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlurHF_K)
344ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        push            {r4-r8, r10, r11, lr}
345ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush           {q4-q7}
346ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
347ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldr r4, [sp, #32+64]
348ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldr r5, [sp, #32+64 + 4]
349ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
350ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
351ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        add r7, r1, r4, lsl #4  /* const uchar *pi = ptrIn + x1 * 4; */
352ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r10, r2
353ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r11, r3
354ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
355ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.32 {q1}, [r7]!
356ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.32 {d6[0]}, [r10]!
357ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.f32 q0, q1, d6[0]
358ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        sub r11, r11, #1
359ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
360ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams2:
361ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.32 {q1}, [r7]!
362ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.32 {q2}, [r7]!
363ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.32 {d6[0]}, [r10]!
364ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.32 {d6[1]}, [r10]!
365ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmla.f32 q0, q1, d6[0]
366ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmla.f32 q0, q2, d6[1]
367ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r11, r11, #2
368ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 2b
369ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
370ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vcvt.s32.f32 q0, q0
371ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovn.u32 d0, q0
372ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovn.u16 d0, q0
373ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
374ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst1.32 {d0[0]}, [r0]!
375ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        add r4, r4, #1
376ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        cmp r4, r5
377ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
378ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
379ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop            {q4-q7}
380ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        pop             {r4-r8, r10, r11, lr}
381ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx              lr
382ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlurHF_K)
383ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
384ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
385ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r0 = dst
386ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r1 = Y
387ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r2 = VU
388ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r3 = length (pixels / 8)
389ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r4 = sp, params
390ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
391ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        This function converts 8 pixels per iteration
392ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
393ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicYuv_K)
394ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        push            {r4-r8, r10, r11, lr}
395ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush           {q4-q7}
396ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
397ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldr r4, [sp, #32+64]
398ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.16 {q2}, [r4]!  // mults
399ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.16 {q3}, [r4]!  // y offset
400ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.16 {q4}, [r4]!  // 128
401ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vdup.8 d3, d5[1]
402ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
403ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
404ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.8 {d10}, [r1]!
405ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.8 {d12}, [r2]!
406ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q5, d10 // Y at .16
407ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q6, d12 // vu at .16
408ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
409ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vsub.i16 q5, q5, q3
410ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vsub.i16 q6, q6, q4
411ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vtrn.16 d12, d13  // d12 = u, d13 = v
412ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmov q7, q6
413ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vtrn.16 d12, d14
414ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vtrn.32 d12, d14
415ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vtrn.16 d13, d15
416ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vtrn.32 d13, d15
417ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
418ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmull.s16 q8, d10, d4[0]
419ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmull.s16 q11, d11, d4[0]
420ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmov q9, q8
421ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmov q10, q8
422ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmov q12, q11
423ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmov q13, q11
424ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
425ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q8,  d12, d4[1]
426ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q9,  d12, d5[0]
427ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q10, d13, d4[3]
428ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q9,  d13, d4[2]
429ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
430ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q11, d14, d4[1]
431ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q12, d14, d5[0]
432ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q13, d15, d4[3]
433ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q12, d15, d4[2]
434ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
435ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
436ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i32 d16, q8, #8
437ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i32 d18, q9, #8
438ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i32 d20, q10, #8
439ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d0, q8
440ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d1, q9
441ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d2, q10
442ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
443ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
444ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
445ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
446ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
447ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i32 d16, q11, #8
448ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i32 d18, q12, #8
449ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i32 d20, q13, #8
450ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d0, q8
451ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d1, q9
452ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d2, q10
453ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
454ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
455ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
456ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
457ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
458ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r3, r3, #1
459ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
460ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
461ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop            {q4-q7}
462ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        pop             {r4-r8, r10, r11, lr}
463ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx              lr
464ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicYuv_K)
465ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
466ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/* Convolve 5x5 */
467ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
468ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
469ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r0 = dst
470ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r1 = y0 base pointer
471ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r2 = y1 base pointer
472ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r3 = y2 base pointer
473ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r4 = y3 base pointer
474ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r5 = y4 base pointer
475ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r6 = coeffs
476ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r7 = length
477ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
478ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicConvolve5x5_K)
479ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        push        {r4-r7, lr}
480ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush       {q4-q7}
481ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
482ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* load y3 in r4 */
483ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldr     r4, [sp, #20 + 64]
484ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
485ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* load y4 in r5 */
486ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldr     r5, [sp, #24 + 64]
487ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
488ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Load the coefficients pointer */
489ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldr     r6, [sp, #28 + 64]
490ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
491ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Create the coefficients vector */
492ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.16     {d0, d1, d2, d3}, [r6]!
493ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.16     {d4, d5, d6}, [r6]
494ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
495ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* load the count */
496ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldr     r6, [sp, #32 + 64]
497ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
498ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Load the frequently used immediate in a register */
499ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov     r7, #8
500ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
501ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
502ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Load the y base pointers in Qregs and post-increase the address by r7=#8 */
503ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.8  {d24, d25, d26}, [r1], r7      @  y0 ( y - 2 )
504ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.8  {d27, d28, d29}, [r2], r7      @  y0 ( y - 1 )
505ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
506ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Signal memory for data that will be used in the loop after the next */
507ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        PLD         (r1, r7)
508ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        PLD         (r2, r7)
509ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
510ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Promoting the 8bit channels to 16bit */
511ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q9,  d24
512ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q10, d25
513ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q11, d26
514ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q12, d27
515ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q13, d28
516ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q14, d29
517ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
518ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
519ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        d18,  d19,  d20, d21, d22, d23,
520ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        d24,  d25
521ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
522ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmull.s16 q4, d18, d0[0]
523ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d19, d0[1]
524ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d20, d0[2]
525ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d21, d0[3]
526ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d22, d1[0]
527ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
528ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d24, d1[1]
529ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d25, d1[2]
530ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d26, d1[3]
531ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d27, d2[0]
532ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d28, d2[1]
533ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
534ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmull.s16 q5, d19, d0[0]
535ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d20, d0[1]
536ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d21, d0[2]
537ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d22, d0[3]
538ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d23, d1[0]
539ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
540ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d25, d1[1]
541ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d26, d1[2]
542ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d27, d1[3]
543ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d28, d2[0]
544ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d29, d2[1]
545ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
546ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
547ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Next 2 rows */
548ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Load the y base pointers in Qregs and post-increase the address by r7=#8 */
549ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.8  {d24, d25, d26}, [r3], r7      @  y0 ( y )
550ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.8  {d27, d28, d29}, [r4], r7      @  y0 ( y + 1 )
551ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
552ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Signal memory for data that will be used in the loop after the next */
553ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        PLD         (r3, r7)
554ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        PLD         (r4, r7)
555ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
556ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Promoting the 8bit channels to 16bit */
557ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q9,  d24
558ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q10, d25
559ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q11, d26
560ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q12, d27
561ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q13, d28
562ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q14, d29
563ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
564ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
565ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        d18,  d19,  d20, d21, d22, d23,
566ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        d24,  d25
567ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
568ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d18, d2[2]
569ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d19, d2[3]
570ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d20, d3[0]
571ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d21, d3[1]
572ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d22, d3[2]
573ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
574ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d24, d3[3]
575ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d25, d4[0]
576ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d26, d4[1]
577ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d27, d4[2]
578ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d28, d4[3]
579ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
580ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d19, d2[2]
581ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d20, d2[3]
582ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d21, d3[0]
583ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d22, d3[1]
584ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d23, d3[2]
585ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
586ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d25, d3[3]
587ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d26, d4[0]
588ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d27, d4[1]
589ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d28, d4[2]
590ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d29, d4[3]
591ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
592ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Last row */
593ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Load the y base pointers in Qregs and post-increase the address by r7=#8 */
594ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld1.8  {d24, d25, d26}, [r5], r7      @  y0 ( y + 2 )
595ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
596ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Signal memory for data that will be used in the loop after the next */
597ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        PLD         (r5, r7)
598ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
599ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Promoting the 8bit channels to 16bit */
600ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q9,  d24
601ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q10, d25
602ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q11, d26
603ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
604ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
605ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        d18,  d19,  d20, d21, d22, d23,
606ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        d24,  d25
607ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
608ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
609ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d18, d5[0]
610ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d19, d5[1]
611ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d20, d5[2]
612ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d21, d5[3]
613ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q4, d22, d6[0]
614ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
615ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d19, d5[0]
616ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d20, d5[1]
617ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d21, d5[2]
618ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d22, d5[3]
619ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmlal.s16 q5, d23, d6[0]
620ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
621ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
622ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
623ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
624ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*      Narrow it to a d-reg 32 -> 16 bit */
625ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i32 d8, q4, #8
626ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i32 d9, q5, #8
627ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
628ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*      Pack 16 -> 8 bit, saturate, put two pixels into D reg */
629ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d8, q4
630ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
631ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst1.8 d8, [r0]!           @ return the output and increase the address of r0
632ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
633ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Are we done? */
634ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r6, r6, #1
635ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
636ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
637ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* Yup, bye */
638ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop        {q4-q7}
639ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        pop         {r4-r7, lr}
640ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx          lr
641ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
642ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicConvolve5x5_K)
643ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
644ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
645ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
646ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
647ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
648ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        dst = src + dst * (1.0 - src.a)
649ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
650ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r0 = dst
651ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r1 = src
652ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r2 = length
653ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
654ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendSrcOver_K)
655ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        .save           {r4, lr}
656ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        stmfd           sp!, {r4, lr}
657ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush           {q4-q7}
658ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
659ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, #255
660ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vdup.16 q7, r4
661ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
662ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, r0
663ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
664ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
665ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* src */
666ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
667ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
668ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
669ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
670ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
671ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
672ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
673ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
674ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshll.u8 q12, d0, #8
675ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshll.u8 q13, d1, #8
676ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshll.u8 q14, d2, #8
677ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q6, d3
678ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vsub.i16 q6, q7, q6        // q6 = 1 - src.a
679ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshll.u8 q15, d3, #8
680ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
681ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* dst */
682ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
683ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
684ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
685ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
686ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
687ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
688ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
689ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
690ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q8, d0
691ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q9, d1
692ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q10, d2
693ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q11, d3
694ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
695ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmla.i16 q12, q8, q6
696ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmla.i16 q13, q9, q6
697ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmla.i16 q14, q10, q6
698ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmla.i16 q15, q11, q6
699ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
700ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d0, q12, #8
701ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d1, q13, #8
702ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d2, q14, #8
703ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d3, q15, #8
704ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
705ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
706ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
707ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
708ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
709ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
710ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
711ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
712ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
713ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r2, r2, #1
714ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
715ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
716ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop            {q4-q7}
717ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldmfd           sp!, {r4, lr}
718ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx              lr
719ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendSrcOver_K)
720ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
721ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
722ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        dst = dst + src * (1.0 - dst.a)
723ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
724ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r0 = dst
725ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r1 = src
726ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r2 = length
727ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
728ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendDstOver_K)
729ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        .save           {r4, lr}
730ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        stmfd           sp!, {r4, lr}
731ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush           {q4-q7}
732ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
733ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, #255
734ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vdup.16 q7, r4
735ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
736ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, r0
737ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
738ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
739ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* src */
740ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
741ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
742ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
743ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
744ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
745ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
746ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
747ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
748ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q12, d0
749ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q13, d1
750ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q14, d2
751ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q15, d3
752ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
753ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* dst */
754ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
755ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
756ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
757ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
758ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
759ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
760ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
761ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
762ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshll.u8 q8, d0, #8
763ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshll.u8 q9, d1, #8
764ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshll.u8 q10, d2, #8
765ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q6, d3
766ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vsub.i16 q6, q7, q6        // q6 = 1 - dst.a
767ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshll.u8 q11, d3, #8
768ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
769ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
770ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmla.i16 q8, q12, q6
771ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmla.i16 q9, q13, q6
772ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmla.i16 q10, q14, q6
773ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmla.i16 q11, q15, q6
774ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
775ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d0, q8, #8
776ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d1, q9, #8
777ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d2, q10, #8
778ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d3, q11, #8
779ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
780ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
781ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
782ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
783ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
784ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
785ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
786ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
787ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
788ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r2, r2, #1
789ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
790ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
791ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop            {q4-q7}
792ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldmfd           sp!, {r4, lr}
793ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx              lr
794ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendDstOver_K)
795ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
796ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
797ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        dst = src * dst.a
798ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
799ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r0 = dst
800ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r1 = src
801ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r2 = length
802ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
803ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendSrcIn_K)
804ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        .save           {r4, lr}
805ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        stmfd           sp!, {r4, lr}
806ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush           {q4-q7}
807ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
808ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, r0
809ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
810ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
811ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* src */
812ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
813ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
814ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
815ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
816ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
817ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
818ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
819ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
820ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q12, d0
821ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q13, d1
822ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q14, d2
823ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q15, d3
824ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
825ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* dst */
826ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
827ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
828ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
829ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
830ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
831ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
832ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
833ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
834ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        //vmovl.u8 q8, d0
835ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        //vmovl.u8 q9, d1
836ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        //vmovl.u8 q10, d2
837ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q11, d3
838ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
839ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q12, q12, q11
840ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q13, q13, q11
841ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q14, q14, q11
842ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q15, q15, q11
843ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
844ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d0, q12, #8
845ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d1, q13, #8
846ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d2, q14, #8
847ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d3, q15, #8
848ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
849ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
850ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
851ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
852ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
853ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
854ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
855ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
856ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
857ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r2, r2, #1
858ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
859ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
860ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop            {q4-q7}
861ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldmfd           sp!, {r4, lr}
862ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx              lr
863ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendSrcIn_K)
864ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
865ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
866ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        dst = dst * src.a
867ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
868ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r0 = dst
869ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r1 = src
870ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r2 = length
871ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
872ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendDstIn_K)
873ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        .save           {r4, lr}
874ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        stmfd           sp!, {r4, lr}
875ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush           {q4-q7}
876ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
877ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, r0
878ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
879ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
880ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* src */
881ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
882ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
883ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
884ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
885ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
886ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
887ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
888ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
889ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        //vmovl.u8 q12, d0
890ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        //vmovl.u8 q13, d1
891ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        //vmovl.u8 q14, d2
892ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q15, d3
893ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
894ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* dst */
895ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
896ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
897ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
898ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
899ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
900ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
901ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
902ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
903ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q8, d0
904ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q9, d1
905ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q10, d2
906ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q11, d3
907ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
908ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q8, q8, q15
909ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q9, q9, q15
910ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q10, q10, q15
911ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q11, q11, q15
912ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
913ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d0, q8, #8
914ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d1, q9, #8
915ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d2, q10, #8
916ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d3, q11, #8
917ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
918ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
919ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
920ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
921ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
922ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
923ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
924ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
925ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
926ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r2, r2, #1
927ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
928ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
929ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop            {q4-q7}
930ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldmfd           sp!, {r4, lr}
931ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx              lr
932ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendDstIn_K)
933ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
934ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
935ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
936ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
937ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        dst = src * (1.0 - dst.a)
938ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
939ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r0 = dst
940ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r1 = src
941ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r2 = length
942ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
943ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendSrcOut_K)
944ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        .save           {r4, lr}
945ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        stmfd           sp!, {r4, lr}
946ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush           {q4-q7}
947ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
948ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, #255
949ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vdup.16 q7, r4
950ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
951ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, r0
952ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
953ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
954ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* src */
955ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
956ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
957ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
958ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
959ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
960ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
961ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
962ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
963ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q12, d0
964ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q13, d1
965ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q14, d2
966ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q15, d3
967ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
968ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* dst */
969ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
970ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
971ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
972ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
973ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
974ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
975ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
976ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
977ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        //vmovl.u8 q8, d0
978ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        //vmovl.u8 q9, d1
979ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        //vmovl.u8 q10, d2
980ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q11, d3
981ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
982ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
983ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vsub.i16 q6, q7, q11        // q6 = 1 - dst.a
984ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q12, q12, q6
985ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q13, q13, q6
986ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q14, q14, q6
987ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q15, q15, q6
988ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
989ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d0, q12, #8
990ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d1, q13, #8
991ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d2, q14, #8
992ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d3, q15, #8
993ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
994ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
995ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
996ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
997ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
998ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
999ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
1000ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
1001ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1002ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r2, r2, #1
1003ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
1004ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1005ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop            {q4-q7}
1006ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldmfd           sp!, {r4, lr}
1007ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx              lr
1008ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendSrcOut_K)
1009ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1010ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1011ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
1012ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        dst = dst * (1.0 - src.a)
1013ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1014ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r0 = dst
1015ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r1 = src
1016ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r2 = length
1017ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
1018ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendDstOut_K)
1019ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        .save           {r4, lr}
1020ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        stmfd           sp!, {r4, lr}
1021ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush           {q4-q7}
1022ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1023ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, #255
1024ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vdup.16 q7, r4
1025ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1026ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, r0
1027ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
1028ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1029ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* src */
1030ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
1031ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
1032ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
1033ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
1034ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
1035ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
1036ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
1037ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
1038ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        //vmovl.u8 q12, d0
1039ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        //vmovl.u8 q13, d1
1040ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        //vmovl.u8 q14, d2
1041ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q15, d3
1042ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1043ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* dst */
1044ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
1045ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
1046ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
1047ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
1048ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
1049ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
1050ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
1051ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
1052ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q8, d0
1053ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q9, d1
1054ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q10, d2
1055ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q11, d3
1056ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1057ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1058ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vsub.i16 q6, q7, q15        // q6 = 1 - src.a
1059ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q12, q8, q6
1060ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q13, q9, q6
1061ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q14, q10, q6
1062ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q15, q11, q6
1063ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1064ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d0, q12, #8
1065ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d1, q13, #8
1066ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d2, q14, #8
1067ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d3, q15, #8
1068ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
1069ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
1070ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
1071ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
1072ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
1073ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
1074ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
1075ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
1076ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1077ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r2, r2, #1
1078ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
1079ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1080ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop            {q4-q7}
1081ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldmfd           sp!, {r4, lr}
1082ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx              lr
1083ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendDstOut_K)
1084ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1085ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1086ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
1087ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        dst.rgb = src.rgb * dst.a + (1.0 - src.a) * dst.rgb
1088ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        dst.a = dst.a
1089ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1090ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r0 = dst
1091ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r1 = src
1092ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r2 = length
1093ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
1094ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendSrcAtop_K)
1095ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        .save           {r4, lr}
1096ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        stmfd           sp!, {r4, lr}
1097ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush           {q4-q7}
1098ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1099ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, #255
1100ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vdup.16 q7, r4
1101ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1102ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, r0
1103ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
1104ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1105ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* src */
1106ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
1107ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
1108ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
1109ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
1110ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
1111ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
1112ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
1113ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
1114ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q12, d0
1115ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q13, d1
1116ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q14, d2
1117ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q15, d3
1118ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1119ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* dst */
1120ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
1121ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
1122ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
1123ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
1124ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
1125ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
1126ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
1127ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
1128ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q8, d0
1129ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q9, d1
1130ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q10, d2
1131ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q11, d3
1132ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1133ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1134ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vsub.i16 q6, q7, q15        // q6 = 1 - src.a
1135ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q8, q8, q6
1136ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q9, q9, q6
1137ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q10, q10, q6
1138ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1139ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmla.i16 q8, q12, q11
1140ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmla.i16 q9, q13, q11
1141ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmla.i16 q10, q14, q11
1142ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1143ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1144ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d0, q8, #8
1145ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d1, q9, #8
1146ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d2, q10, #8
1147ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        //vshrn.i16 d3, q15, #8
1148ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
1149ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
1150ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
1151ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
1152ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
1153ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
1154ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
1155ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
1156ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1157ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r2, r2, #1
1158ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
1159ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1160ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop            {q4-q7}
1161ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldmfd           sp!, {r4, lr}
1162ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx              lr
1163ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendSrcAtop_K)
1164ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1165ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
1166ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        dst = dst.rgb * src.a + (1.0 - dst.a) * src.rgb
1167ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        dst.a = src.a
1168ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1169ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r0 = dst
1170ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r1 = src
1171ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r2 = length
1172ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
1173ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendDstAtop_K)
1174ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        .save           {r4, lr}
1175ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        stmfd           sp!, {r4, lr}
1176ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush           {q4-q7}
1177ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1178ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, #255
1179ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vdup.16 q7, r4
1180ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1181ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, r0
1182ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
1183ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1184ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* src */
1185ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
1186ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
1187ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
1188ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
1189ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
1190ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
1191ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
1192ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
1193ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q12, d0
1194ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q13, d1
1195ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q14, d2
1196ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q15, d3
1197ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1198ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* dst */
1199ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
1200ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
1201ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
1202ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
1203ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
1204ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
1205ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
1206ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
1207ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q8, d0
1208ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q9, d1
1209ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q10, d2
1210ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q11, d3
1211ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1212ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1213ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vsub.i16 q6, q7, q11        // q6 = 1 - dst.a
1214ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q12, q12, q6
1215ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q13, q13, q6
1216ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q14, q14, q6
1217ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1218ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmla.i16 q12, q8, q15
1219ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmla.i16 q13, q9, q15
1220ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmla.i16 q14, q10, q15
1221ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1222ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1223ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d0, q12, #8
1224ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d1, q13, #8
1225ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d2, q14, #8
1226ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        //vshrn.i16 d3, q15, #8
1227ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
1228ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
1229ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
1230ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
1231ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
1232ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
1233ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
1234ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
1235ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1236ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r2, r2, #1
1237ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
1238ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1239ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop            {q4-q7}
1240ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldmfd           sp!, {r4, lr}
1241ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx              lr
1242ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendDstAtop_K)
1243ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1244ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
1245ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        dst = dst ^ src
1246ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1247ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r0 = dst
1248ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r1 = src
1249ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r2 = length
1250ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
1251ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendXor_K)
1252ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        .save           {r4, lr}
1253ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        stmfd           sp!, {r4, lr}
1254ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush           {q4-q7}
1255ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1256ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, #255
1257ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vdup.16 q7, r4
1258ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1259ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, r0
1260ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
1261ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1262ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* src */
1263ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
1264ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
1265ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
1266ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
1267ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
1268ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
1269ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
1270ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
1271ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmov.u8 d4, d0
1272ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmov.u8 d5, d1
1273ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmov.u8 d6, d2
1274ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmov.u8 d7, d3
1275ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1276ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* dst */
1277ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
1278ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
1279ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
1280ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
1281ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
1282ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
1283ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
1284ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
1285ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1286ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        veor d0, d0, d4
1287ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        veor d1, d1, d5
1288ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        veor d2, d2, d6
1289ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        veor d3, d3, d7
1290ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1291ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
1292ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
1293ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
1294ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
1295ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
1296ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
1297ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
1298ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
1299ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1300ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r2, r2, #1
1301ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
1302ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1303ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop            {q4-q7}
1304ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldmfd           sp!, {r4, lr}
1305ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx              lr
1306ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendXor_K)
1307ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1308ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
1309ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        dst = dst * src
1310ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1311ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r0 = dst
1312ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r1 = src
1313ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r2 = length
1314ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
1315ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendMultiply_K)
1316ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        .save           {r4, lr}
1317ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        stmfd           sp!, {r4, lr}
1318ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush           {q4-q7}
1319ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1320ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, #255
1321ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vdup.16 q7, r4
1322ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1323ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, r0
1324ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
1325ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1326ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* src */
1327ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
1328ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
1329ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
1330ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
1331ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
1332ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
1333ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
1334ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
1335ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q12, d0
1336ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q13, d1
1337ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q14, d2
1338ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q15, d3
1339ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1340ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* dst */
1341ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
1342ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
1343ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
1344ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
1345ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
1346ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
1347ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
1348ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
1349ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q8, d0
1350ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q9, d1
1351ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q10, d2
1352ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q11, d3
1353ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1354ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1355ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q8, q8, q12
1356ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q9, q9, q13
1357ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q10, q10, q14
1358ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmul.i16 q11, q11, q15
1359ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1360ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d0, q8, #8
1361ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d1, q9, #8
1362ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d2, q10, #8
1363ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vshrn.i16 d3, q11, #8
1364ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
1365ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
1366ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
1367ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
1368ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
1369ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
1370ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
1371ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
1372ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1373ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r2, r2, #1
1374ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
1375ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1376ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop            {q4-q7}
1377ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldmfd           sp!, {r4, lr}
1378ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx              lr
1379ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendMultiply_K)
1380ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1381ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
1382ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        dst = min(src + dst, 1.0)
1383ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1384ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r0 = dst
1385ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r1 = src
1386ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r2 = length
1387ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
1388ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendAdd_K)
1389ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        .save           {r4, lr}
1390ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        stmfd           sp!, {r4, lr}
1391ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush           {q4-q7}
1392ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1393ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, #255
1394ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vdup.16 q7, r4
1395ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1396ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, r0
1397ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
1398ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1399ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* src */
1400ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
1401ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
1402ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
1403ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
1404ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
1405ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
1406ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
1407ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
1408ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q12, d0
1409ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q13, d1
1410ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q14, d2
1411ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q15, d3
1412ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1413ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* dst */
1414ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
1415ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
1416ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
1417ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
1418ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
1419ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
1420ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
1421ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
1422ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q8, d0
1423ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q9, d1
1424ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q10, d2
1425ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q11, d3
1426ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1427ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1428ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vadd.i16 q8, q8, q12
1429ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vadd.i16 q9, q9, q13
1430ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vadd.i16 q10, q10, q14
1431ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vadd.i16 q11, q11, q15
1432ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1433ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d0, q8
1434ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d1, q9
1435ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d2, q10
1436ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d3, q11
1437ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
1438ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
1439ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
1440ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
1441ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
1442ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
1443ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
1444ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
1445ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1446ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r2, r2, #1
1447ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
1448ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1449ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop            {q4-q7}
1450ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldmfd           sp!, {r4, lr}
1451ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx              lr
1452ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendAdd_K)
1453ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1454ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1455ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams/*
1456ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        dst = max(dst - src, 0.0)
1457ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1458ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r0 = dst
1459ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r1 = src
1460ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        r2 = length
1461ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams*/
1462ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsENTRY(rsdIntrinsicBlendSub_K)
1463ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        .save           {r4, lr}
1464ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        stmfd           sp!, {r4, lr}
1465ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpush           {q4-q7}
1466ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1467ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, #255
1468ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vdup.16 q7, r4
1469ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1470ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        mov r4, r0
1471ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams1:
1472ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1473ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* src */
1474ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
1475ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
1476ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
1477ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
1478ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r1]!
1479ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r1]!
1480ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r1]!
1481ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r1]!
1482ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q12, d0
1483ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q13, d1
1484ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q14, d2
1485ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q15, d3
1486ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1487ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        /* dst */
1488ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
1489ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
1490ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
1491ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
1492ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[4],d1[4],d2[4],d3[4]}, [r0]!
1493ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[5],d1[5],d2[5],d3[5]}, [r0]!
1494ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[6],d1[6],d2[6],d3[6]}, [r0]!
1495ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vld4.8 {d0[7],d1[7],d2[7],d3[7]}, [r0]!
1496ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q8, d0
1497ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q9, d1
1498ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q10, d2
1499ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vmovl.u8 q11, d3
1500ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1501ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1502ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vsub.i16 q8, q8, q12
1503ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vsub.i16 q9, q9, q13
1504ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vsub.i16 q10, q10, q14
1505ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vsub.i16 q11, q11, q15
1506ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1507ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d0, q8
1508ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d1, q9
1509ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d2, q10
1510ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vqmovun.s16 d3, q11
1511ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r4]!
1512ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r4]!
1513ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r4]!
1514ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r4]!
1515ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[4],d1[4],d2[4],d3[4]}, [r4]!
1516ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[5],d1[5],d2[5],d3[5]}, [r4]!
1517ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[6],d1[6],d2[6],d3[6]}, [r4]!
1518ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vst4.8 {d0[7],d1[7],d2[7],d3[7]}, [r4]!
1519ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1520ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        subs r2, r2, #1
1521ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bne 1b
1522ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1523ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        vpop            {q4-q7}
1524ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        ldmfd           sp!, {r4, lr}
1525ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams        bx              lr
1526ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason SamsEND(rsdIntrinsicBlendSub_K)
1527ca29b8caf56fa4866752f9cea4ec02b2a271dceeJason Sams
1528