rsCpuIntrinsics_neon_Convolve.S revision e1e08b4c9cc80c51224fdaf3aeab0804daf073e6
1e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams/* 2e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * Copyright (C) 2012 The Android Open Source Project 3e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * 4e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * Licensed under the Apache License, Version 2.0 (the "License"); 5e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * you may not use this file except in compliance with the License. 6e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * You may obtain a copy of the License at 7e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * 8e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * http://www.apache.org/licenses/LICENSE-2.0 9e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * 10e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * Unless required by applicable law or agreed to in writing, software 11e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * distributed under the License is distributed on an "AS IS" BASIS, 12e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * See the License for the specific language governing permissions and 14e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams * limitations under the License. 15e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams */ 16e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 17e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 18e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 19e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams#include <machine/cpu-features.h> 20e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams#include <machine/asm.h> 21e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 22e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams/* 23e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r0 = dst 24e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r1 = y0 base pointer 25e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r2 = y1 base pointer 26e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r3 = y2 base pointer 27e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams sp = coeffs 28e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams sp = length / 2 29e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams*/ 30e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 31e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason SamsENTRY(rsdIntrinsicConvolve3x3_K) 32e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams .save {r4, lr} 33e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams stmfd sp!, {r4, lr} 34e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vpush {q4-q7} 35e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 36e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams ldr r4, [sp, #8+64] 37e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld1.16 {q0}, [r4]! 38e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld1.16 {q1}, [r4] 39e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams ldr r4, [sp, #12+64] 40e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 41e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams1: 42e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld1.8 {q13}, [r1] 43e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld1.8 {q14}, [r2] 44e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld1.8 {q15}, [r3] 45e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams add r1, r1, #8 46e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams add r2, r2, #8 47e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams add r3, r3, #8 48e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams PLD (r1, #8) 49e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams PLD (r2, #8) 50e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams PLD (r3, #8) 51e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 52e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q2, d26 53e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q3, d27 54e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q4, d28 55e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q5, d29 56e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q6, d30 57e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q7, d31 58e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 59e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams/* 60e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams The two pixel source array is 61e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams d4, d5, d6, d7 62e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams d8, d9, d10, d11 63e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams d12, d13, d14, d15 64e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams*/ 65e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 66e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmull.s16 q8, d4, d0[0] 67e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmull.s16 q9, d5, d0[0] 68e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 69e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q8, d5, d0[1] 70e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d6, d0[1] 71e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 72e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q8, d6, d0[2] 73e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d7, d0[2] 74e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 75e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q8, d4, d0[3] 76e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d9, d0[3] 77e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 78e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q8, d9, d1[0] 79e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d10, d1[0] 80e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 81e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q8, d10, d1[1] 82e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d11, d1[1] 83e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 84e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q8, d12, d1[2] 85e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d13, d1[2] 86e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 87e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q8, d13, d1[3] 88e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d14, d1[3] 89e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 90e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q8, d14, d2[0] 91e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d15, d2[0] 92e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 93e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vshrn.i32 d16, q8, #8 94e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vshrn.i32 d17, q9, #8 95e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 96e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vqmovun.s16 d16, q8 97e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vst1.8 d16, [r0]! 98e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 99e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams subs r4, r4, #1 100e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams bne 1b 101e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 102e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 103e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vpop {q4-q7} 104e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams ldmfd sp!, {r4, lr} 105e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams bx lr 106e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason SamsEND(TestConvolveK) 107e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 108e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 109e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams/* 110e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r0 = dst 111e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r1 = src 112e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r2 = matrx 113e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r3 = length 114e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams*/ 115e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason SamsENTRY(rsdIntrinsicColorMatrix4x4_K) 116e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams .save {r4, lr} 117e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams stmfd sp!, {r4, lr} 118e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vpush {q4-q7} 119e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 120e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld1.16 {q2}, [r2]! 121e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld1.16 {q3}, [r2]! 122e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 123e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams1: 124e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 125e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 126e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 127e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 128e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 129e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q12, d0 130e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q13, d1 131e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q14, d2 132e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q15, d3 133e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 134e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmull.s16 q8, d24, d4[0] 135e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmull.s16 q9, d26, d4[1] 136e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmull.s16 q10, d28, d4[2] 137e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmull.s16 q11, d30, d4[3] 138e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 139e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q8, d24, d5[0] 140e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d26, d5[1] 141e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q10, d28, d5[2] 142e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q11, d30, d5[3] 143e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 144e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q8, d24, d6[0] 145e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d26, d6[1] 146e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q10, d28, d6[2] 147e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q11, d30, d6[3] 148e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 149e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q8, d24, d7[0] 150e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d26, d7[1] 151e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q10, d28, d7[2] 152e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q11, d30, d7[3] 153e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 154e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vshrn.i32 d24, q8, #8 155e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vshrn.i32 d26, q9, #8 156e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vshrn.i32 d28, q10, #8 157e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vshrn.i32 d30, q11, #8 158e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 159e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vqmovun.s16 d0, q12 160e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vqmovun.s16 d1, q13 161e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vqmovun.s16 d2, q14 162e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vqmovun.s16 d3, q15 163e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 164e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 165e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 166e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 167e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 168e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 169e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams subs r3, r3, #1 170e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams bne 1b 171e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 172e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vpop {q4-q7} 173e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams ldmfd sp!, {r4, lr} 174e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams bx lr 175e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason SamsEND(rsdIntrinsicColorMatrix4x4K) 176e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 177e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams/* 178e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r0 = dst 179e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r1 = src 180e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r2 = matrx 181e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams r3 = length 182e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams*/ 183e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason SamsENTRY(rsdIntrinsicColorMatrix3x3K) 184e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams .save {r4, lr} 185e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams stmfd sp!, {r4, lr} 186e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vpush {q4-q7} 187e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 188e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld1.16 {q2}, [r2]! 189e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld1.16 {q3}, [r2]! 190e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 191e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams1: 192e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [r1]! 193e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld4.8 {d0[1],d1[1],d2[1],d3[1]}, [r1]! 194e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld4.8 {d0[2],d1[2],d2[2],d3[2]}, [r1]! 195e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vld4.8 {d0[3],d1[3],d2[3],d3[3]}, [r1]! 196e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 197e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q12, d0 198e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q13, d1 199e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmovl.u8 q14, d2 200e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 201e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmull.s16 q8, d24, d4[0] 202e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmull.s16 q9, d26, d4[1] 203e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmull.s16 q10, d28, d4[2] 204e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 205e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q8, d24, d5[0] 206e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d26, d5[1] 207e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q10, d28, d5[2] 208e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 209e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q8, d24, d6[0] 210e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q9, d26, d6[1] 211e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vmlal.s16 q10, d28, d6[2] 212e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 213e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vshrn.i32 d24, q8, #8 214e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vshrn.i32 d26, q9, #8 215e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vshrn.i32 d28, q10, #8 216e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 217e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vqmovun.s16 d0, q12 218e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vqmovun.s16 d1, q13 219e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vqmovun.s16 d2, q14 220e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 221e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [r0]! 222e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vst4.8 {d0[1],d1[1],d2[1],d3[1]}, [r0]! 223e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vst4.8 {d0[2],d1[2],d2[2],d3[2]}, [r0]! 224e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vst4.8 {d0[3],d1[3],d2[3],d3[3]}, [r0]! 225e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 226e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams subs r3, r3, #1 227e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams bne 1b 228e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 229e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams vpop {q4-q7} 230e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams ldmfd sp!, {r4, lr} 231e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams bx lr 232e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason SamsEND(rsdIntrinsicColorMatrix3x3K) 233e1e08b4c9cc80c51224fdaf3aeab0804daf073e6Jason Sams 234