rsCpuIntrinsics_neon_Convolve.S revision e1e08b4c9cc80c51224fdaf3aeab0804daf073e6
1e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams/*
2e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * Copyright (C) 2012 The Android Open Source Project
3e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams *
4e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * Licensed under the Apache License, Version 2.0 (the "License");
5e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * you may not use this file except in compliance with the License.
6e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * You may obtain a copy of the License at
7e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams *
8e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams *      http://www.apache.org/licenses/LICENSE-2.0
9e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams *
10e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * Unless required by applicable law or agreed to in writing, software
11e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * distributed under the License is distributed on an "AS IS" BASIS,
12e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * See the License for the specific language governing permissions and
14e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * limitations under the License.
15e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams */
16e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
17e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
18e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
19e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams#include <machine/cpu-features.h>
20e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams#include <machine/asm.h>
21e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
22e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams/*
23e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        r0 = dst
24e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        r1 = y0 base pointer
25e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        r2 = y1 base pointer
26e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        r3 = y2 base pointer
27e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        sp = coeffs
28e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        sp = length / 2
29e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams*/
30e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
31e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason SamsENTRY(rsdIntrinsicConvolve3x3_K)
32e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        .save           {r4, lr}
33e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        stmfd           sp!, {r4, lr}
34e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vpush           {q4-q7}
35e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
36e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        ldr r4, [sp, #8+64]
37e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vld1.16 {q0}, [r4]!
38e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vld1.16 {q1}, [r4]
39e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        ldr r4, [sp, #12+64]
40e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
41e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams1:
42e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vld1.8 {q13}, [r1]
43e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vld1.8 {q14}, [r2]
44e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vld1.8 {q15}, [r3]
45e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        add r1, r1, #8
46e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        add r2, r2, #8
47e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        add r3, r3, #8
48e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        PLD         (r1, #8)
49e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        PLD         (r2, #8)
50e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        PLD         (r3, #8)
51e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
52e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmovl.u8 q2, d26
53e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmovl.u8 q3, d27
54e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmovl.u8 q4, d28
55e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmovl.u8 q5, d29
56e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmovl.u8 q6, d30
57e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmovl.u8 q7, d31
58e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
59e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams/*
60e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        The two pixel source array is
61e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        d4,  d5,  d6,  d7
62e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        d8,  d9,  d10, d11
63e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        d12, d13, d14, d15
64e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams*/
65e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
66e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmull.s16 q8, d4, d0[0]
67e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmull.s16 q9, d5, d0[0]
68e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
69e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q8, d5, d0[1]
70e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q9, d6, d0[1]
71e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
72e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q8, d6, d0[2]
73e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q9, d7, d0[2]
74e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
75e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q8, d4, d0[3]
76e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q9, d9, d0[3]
77e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
78e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q8, d9, d1[0]
79e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q9, d10, d1[0]
80e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
81e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q8, d10, d1[1]
82e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q9, d11, d1[1]
83e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
84e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q8, d12, d1[2]
85e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q9, d13, d1[2]
86e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
87e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q8, d13, d1[3]
88e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q9, d14, d1[3]
89e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
90e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q8, d14, d2[0]
91e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q9, d15, d2[0]
92e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
93e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vshrn.i32 d16, q8, #8
94e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vshrn.i32 d17, q9, #8
95e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
96e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vqmovun.s16 d16, q8
97e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vst1.8 d16, [r0]!
98e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
99e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        subs r4, r4, #1
100e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        bne 1b
101e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
102e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
103e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vpop            {q4-q7}
104e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        ldmfd           sp!, {r4, lr}
105e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        bx              lr
106e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason SamsEND(TestConvolveK)
107e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
108e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
109e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams/*
110e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        r0 = dst
111e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        r1 = src
112e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        r2 = matrx
113e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        r3 = length
114e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams*/
115e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason SamsENTRY(rsdIntrinsicColorMatrix4x4_K)
116e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        .save           {r4, lr}
117e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        stmfd           sp!, {r4, lr}
118e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vpush           {q4-q7}
119e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
120e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vld1.16 {q2}, [r2]!
121e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vld1.16 {q3}, [r2]!
122e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
123e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams1:
124e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
125e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
126e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
127e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
128e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
129e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmovl.u8 q12, d0
130e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmovl.u8 q13, d1
131e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmovl.u8 q14, d2
132e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmovl.u8 q15, d3
133e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
134e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmull.s16 q8,  d24, d4[0]
135e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmull.s16 q9,  d26, d4[1]
136e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmull.s16 q10, d28, d4[2]
137e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmull.s16 q11, d30, d4[3]
138e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
139e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q8,  d24, d5[0]
140e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q9,  d26, d5[1]
141e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q10, d28, d5[2]
142e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q11, d30, d5[3]
143e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
144e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q8,  d24, d6[0]
145e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q9,  d26, d6[1]
146e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q10, d28, d6[2]
147e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q11, d30, d6[3]
148e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
149e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q8,  d24, d7[0]
150e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q9,  d26, d7[1]
151e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q10, d28, d7[2]
152e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q11, d30, d7[3]
153e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
154e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vshrn.i32 d24, q8, #8
155e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vshrn.i32 d26, q9, #8
156e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vshrn.i32 d28, q10, #8
157e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vshrn.i32 d30, q11, #8
158e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
159e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vqmovun.s16 d0, q12
160e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vqmovun.s16 d1, q13
161e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vqmovun.s16 d2, q14
162e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vqmovun.s16 d3, q15
163e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
164e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
165e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
166e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
167e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
168e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
169e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        subs r3, r3, #1
170e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        bne 1b
171e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
172e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vpop            {q4-q7}
173e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        ldmfd           sp!, {r4, lr}
174e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        bx              lr
175e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason SamsEND(rsdIntrinsicColorMatrix4x4K)
176e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
177e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams/*
178e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        r0 = dst
179e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        r1 = src
180e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        r2 = matrx
181e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        r3 = length
182e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams*/
183e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason SamsENTRY(rsdIntrinsicColorMatrix3x3K)
184e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        .save           {r4, lr}
185e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        stmfd           sp!, {r4, lr}
186e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vpush           {q4-q7}
187e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
188e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vld1.16 {q2}, [r2]!
189e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vld1.16 {q3}, [r2]!
190e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
191e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams1:
192e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]!
193e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]!
194e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]!
195e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]!
196e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
197e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmovl.u8 q12, d0
198e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmovl.u8 q13, d1
199e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmovl.u8 q14, d2
200e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
201e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmull.s16 q8,  d24, d4[0]
202e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmull.s16 q9,  d26, d4[1]
203e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmull.s16 q10, d28, d4[2]
204e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
205e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q8,  d24, d5[0]
206e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q9,  d26, d5[1]
207e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q10, d28, d5[2]
208e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
209e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q8,  d24, d6[0]
210e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q9,  d26, d6[1]
211e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vmlal.s16 q10, d28, d6[2]
212e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
213e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vshrn.i32 d24, q8, #8
214e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vshrn.i32 d26, q9, #8
215e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vshrn.i32 d28, q10, #8
216e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
217e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vqmovun.s16 d0, q12
218e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vqmovun.s16 d1, q13
219e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vqmovun.s16 d2, q14
220e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
221e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]!
222e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]!
223e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]!
224e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]!
225e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
226e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        subs r3, r3, #1
227e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        bne 1b
228e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
229e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        vpop            {q4-q7}
230e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        ldmfd           sp!, {r4, lr}
231e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams        bx              lr
232e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason SamsEND(rsdIntrinsicColorMatrix3x3K)
233e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams
234