1233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 2233d2500723e5594f3e7c70896ffeeef32b9c950ywan; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 4233d2500723e5594f3e7c70896ffeeef32b9c950ywan; Use of this source code is governed by a BSD-style license 5233d2500723e5594f3e7c70896ffeeef32b9c950ywan; that can be found in the LICENSE file in the root of the source 6233d2500723e5594f3e7c70896ffeeef32b9c950ywan; tree. An additional intellectual property rights grant can be found 7233d2500723e5594f3e7c70896ffeeef32b9c950ywan; in the file PATENTS. All contributing project authors may 8233d2500723e5594f3e7c70896ffeeef32b9c950ywan; be found in the AUTHORS file in the root of the source tree. 9233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 10233d2500723e5594f3e7c70896ffeeef32b9c950ywan 11233d2500723e5594f3e7c70896ffeeef32b9c950ywan 12233d2500723e5594f3e7c70896ffeeef32b9c950ywan EXPORT |vp8_sixtap_predict8x4_neon| 13233d2500723e5594f3e7c70896ffeeef32b9c950ywan ARM 14233d2500723e5594f3e7c70896ffeeef32b9c950ywan REQUIRE8 15233d2500723e5594f3e7c70896ffeeef32b9c950ywan PRESERVE8 16233d2500723e5594f3e7c70896ffeeef32b9c950ywan 17233d2500723e5594f3e7c70896ffeeef32b9c950ywan AREA ||.text||, CODE, READONLY, ALIGN=2 18233d2500723e5594f3e7c70896ffeeef32b9c950ywan 19233d2500723e5594f3e7c70896ffeeef32b9c950ywanfilter8_coeff 20233d2500723e5594f3e7c70896ffeeef32b9c950ywan DCD 0, 0, 128, 0, 0, 0, 0, 0 21233d2500723e5594f3e7c70896ffeeef32b9c950ywan DCD 0, -6, 123, 12, -1, 0, 0, 0 22233d2500723e5594f3e7c70896ffeeef32b9c950ywan DCD 2, -11, 108, 36, -8, 1, 0, 0 23233d2500723e5594f3e7c70896ffeeef32b9c950ywan DCD 0, -9, 93, 50, -6, 0, 0, 0 24233d2500723e5594f3e7c70896ffeeef32b9c950ywan DCD 3, -16, 77, 77, -16, 3, 0, 0 25233d2500723e5594f3e7c70896ffeeef32b9c950ywan DCD 0, -6, 50, 93, -9, 0, 0, 0 26233d2500723e5594f3e7c70896ffeeef32b9c950ywan DCD 1, -8, 36, 108, -11, 2, 0, 0 27233d2500723e5594f3e7c70896ffeeef32b9c950ywan DCD 0, -1, 12, 123, -6, 0, 0, 0 28233d2500723e5594f3e7c70896ffeeef32b9c950ywan 29233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r0 unsigned char *src_ptr, 30233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r1 int src_pixels_per_line, 31233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r2 int xoffset, 32233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r3 int yoffset, 33233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r4 unsigned char *dst_ptr, 34233d2500723e5594f3e7c70896ffeeef32b9c950ywan; stack(r5) int dst_pitch 35233d2500723e5594f3e7c70896ffeeef32b9c950ywan 36233d2500723e5594f3e7c70896ffeeef32b9c950ywan|vp8_sixtap_predict8x4_neon| PROC 37233d2500723e5594f3e7c70896ffeeef32b9c950ywan push {r4-r5, lr} 38233d2500723e5594f3e7c70896ffeeef32b9c950ywan 39233d2500723e5594f3e7c70896ffeeef32b9c950ywan adr r12, filter8_coeff 40233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r4, [sp, #12] ;load parameters from stack 41233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r5, [sp, #16] ;load parameters from stack 42233d2500723e5594f3e7c70896ffeeef32b9c950ywan 43233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r2, #0 ;skip first_pass filter if xoffset=0 44233d2500723e5594f3e7c70896ffeeef32b9c950ywan beq secondpass_filter8x4_only 45233d2500723e5594f3e7c70896ffeeef32b9c950ywan 46233d2500723e5594f3e7c70896ffeeef32b9c950ywan add r2, r12, r2, lsl #5 ;calculate filter location 47233d2500723e5594f3e7c70896ffeeef32b9c950ywan 48233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r3, #0 ;skip second_pass filter if yoffset=0 49233d2500723e5594f3e7c70896ffeeef32b9c950ywan 50233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.s32 {q14, q15}, [r2] ;load first_pass filter 51233d2500723e5594f3e7c70896ffeeef32b9c950ywan 52233d2500723e5594f3e7c70896ffeeef32b9c950ywan beq firstpass_filter8x4_only 53233d2500723e5594f3e7c70896ffeeef32b9c950ywan 54233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub sp, sp, #32 ;reserve space on stack for temporary storage 55233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabs.s32 q12, q14 56233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabs.s32 q13, q15 57233d2500723e5594f3e7c70896ffeeef32b9c950ywan 58233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r0, r0, #2 ;move srcptr back to (line-2) and (column-2) 59233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov lr, sp 60233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r0, r0, r1, lsl #1 61233d2500723e5594f3e7c70896ffeeef32b9c950ywan 62233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d0, d24[0] ;first_pass filter (d0-d5) 63233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d1, d24[4] 64233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d2, d25[0] 65233d2500723e5594f3e7c70896ffeeef32b9c950ywan 66233d2500723e5594f3e7c70896ffeeef32b9c950ywan;First pass: output_height lines x output_width columns (9x8) 67233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {q3}, [r0], r1 ;load src data 68233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d3, d25[4] 69233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {q4}, [r0], r1 70233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d4, d26[0] 71233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {q5}, [r0], r1 72233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d5, d26[4] 73233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {q6}, [r0], r1 74233d2500723e5594f3e7c70896ffeeef32b9c950ywan 75233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [r0] 76233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [r0, r1] 77233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [r0, r1, lsl #1] 78233d2500723e5594f3e7c70896ffeeef32b9c950ywan 79233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp8_filter[0]) 80233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q8, d8, d0 81233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q9, d10, d0 82233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q10, d12, d0 83233d2500723e5594f3e7c70896ffeeef32b9c950ywan 84233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d28, d6, d7, #1 ;construct src_ptr[-1] 85233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d29, d8, d9, #1 86233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d30, d10, d11, #1 87233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d31, d12, d13, #1 88233d2500723e5594f3e7c70896ffeeef32b9c950ywan 89233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q7, d28, d1 ;-(src_ptr[-1] * vp8_filter[1]) 90233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q8, d29, d1 91233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q9, d30, d1 92233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q10, d31, d1 93233d2500723e5594f3e7c70896ffeeef32b9c950ywan 94233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d28, d6, d7, #4 ;construct src_ptr[2] 95233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d29, d8, d9, #4 96233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d30, d10, d11, #4 97233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d31, d12, d13, #4 98233d2500723e5594f3e7c70896ffeeef32b9c950ywan 99233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q7, d28, d4 ;-(src_ptr[2] * vp8_filter[4]) 100233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q8, d29, d4 101233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q9, d30, d4 102233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q10, d31, d4 103233d2500723e5594f3e7c70896ffeeef32b9c950ywan 104233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d28, d6, d7, #2 ;construct src_ptr[0] 105233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d29, d8, d9, #2 106233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d30, d10, d11, #2 107233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d31, d12, d13, #2 108233d2500723e5594f3e7c70896ffeeef32b9c950ywan 109233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q7, d28, d2 ;(src_ptr[0] * vp8_filter[2]) 110233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q8, d29, d2 111233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q9, d30, d2 112233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q10, d31, d2 113233d2500723e5594f3e7c70896ffeeef32b9c950ywan 114233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d28, d6, d7, #5 ;construct src_ptr[3] 115233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d29, d8, d9, #5 116233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d30, d10, d11, #5 117233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d31, d12, d13, #5 118233d2500723e5594f3e7c70896ffeeef32b9c950ywan 119233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q7, d28, d5 ;(src_ptr[3] * vp8_filter[5]) 120233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q8, d29, d5 121233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q9, d30, d5 122233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q10, d31, d5 123233d2500723e5594f3e7c70896ffeeef32b9c950ywan 124233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d28, d6, d7, #3 ;construct src_ptr[1] 125233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d29, d8, d9, #3 126233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d30, d10, d11, #3 127233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d31, d12, d13, #3 128233d2500723e5594f3e7c70896ffeeef32b9c950ywan 129233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q3, d28, d3 ;(src_ptr[1] * vp8_filter[3]) 130233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q4, d29, d3 131233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q5, d30, d3 132233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q6, d31, d3 133233d2500723e5594f3e7c70896ffeeef32b9c950ywan 134233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) 135233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q8, q4 136233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q9, q5 137233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q10, q6 138233d2500723e5594f3e7c70896ffeeef32b9c950ywan 139233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {q3}, [r0], r1 ;load src data 140233d2500723e5594f3e7c70896ffeeef32b9c950ywan 141233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d22, q7, #7 ;shift/round/saturate to u8 142233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d23, q8, #7 143233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d24, q9, #7 144233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d25, q10, #7 145233d2500723e5594f3e7c70896ffeeef32b9c950ywan 146233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {q4}, [r0], r1 147233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d22}, [lr]! ;store result 148233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {q5}, [r0], r1 149233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d23}, [lr]! 150233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {q6}, [r0], r1 151233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d24}, [lr]! 152233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {q7}, [r0], r1 153233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d25}, [lr]! 154233d2500723e5594f3e7c70896ffeeef32b9c950ywan 155233d2500723e5594f3e7c70896ffeeef32b9c950ywan ;first_pass filtering on the rest 5-line data 156233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q8, d6, d0 ;(src_ptr[-2] * vp8_filter[0]) 157233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q9, d8, d0 158233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q10, d10, d0 159233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q11, d12, d0 160233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q12, d14, d0 161233d2500723e5594f3e7c70896ffeeef32b9c950ywan 162233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d27, d6, d7, #1 ;construct src_ptr[-1] 163233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d28, d8, d9, #1 164233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d29, d10, d11, #1 165233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d30, d12, d13, #1 166233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d31, d14, d15, #1 167233d2500723e5594f3e7c70896ffeeef32b9c950ywan 168233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q8, d27, d1 ;-(src_ptr[-1] * vp8_filter[1]) 169233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q9, d28, d1 170233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q10, d29, d1 171233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q11, d30, d1 172233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q12, d31, d1 173233d2500723e5594f3e7c70896ffeeef32b9c950ywan 174233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d27, d6, d7, #4 ;construct src_ptr[2] 175233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d28, d8, d9, #4 176233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d29, d10, d11, #4 177233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d30, d12, d13, #4 178233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d31, d14, d15, #4 179233d2500723e5594f3e7c70896ffeeef32b9c950ywan 180233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q8, d27, d4 ;-(src_ptr[2] * vp8_filter[4]) 181233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q9, d28, d4 182233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q10, d29, d4 183233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q11, d30, d4 184233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q12, d31, d4 185233d2500723e5594f3e7c70896ffeeef32b9c950ywan 186233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d27, d6, d7, #2 ;construct src_ptr[0] 187233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d28, d8, d9, #2 188233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d29, d10, d11, #2 189233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d30, d12, d13, #2 190233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d31, d14, d15, #2 191233d2500723e5594f3e7c70896ffeeef32b9c950ywan 192233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q8, d27, d2 ;(src_ptr[0] * vp8_filter[2]) 193233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q9, d28, d2 194233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q10, d29, d2 195233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q11, d30, d2 196233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q12, d31, d2 197233d2500723e5594f3e7c70896ffeeef32b9c950ywan 198233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d27, d6, d7, #5 ;construct src_ptr[3] 199233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d28, d8, d9, #5 200233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d29, d10, d11, #5 201233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d30, d12, d13, #5 202233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d31, d14, d15, #5 203233d2500723e5594f3e7c70896ffeeef32b9c950ywan 204233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q8, d27, d5 ;(src_ptr[3] * vp8_filter[5]) 205233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q9, d28, d5 206233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q10, d29, d5 207233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q11, d30, d5 208233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q12, d31, d5 209233d2500723e5594f3e7c70896ffeeef32b9c950ywan 210233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d27, d6, d7, #3 ;construct src_ptr[1] 211233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d28, d8, d9, #3 212233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d29, d10, d11, #3 213233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d30, d12, d13, #3 214233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d31, d14, d15, #3 215233d2500723e5594f3e7c70896ffeeef32b9c950ywan 216233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q3, d27, d3 ;(src_ptr[1] * vp8_filter[3]) 217233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q4, d28, d3 218233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q5, d29, d3 219233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q6, d30, d3 220233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q7, d31, d3 221233d2500723e5594f3e7c70896ffeeef32b9c950ywan 222233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q8, q3 ;sum of all (src_data*filter_parameters) 223233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q9, q4 224233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q10, q5 225233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q11, q6 226233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q12, q7 227233d2500723e5594f3e7c70896ffeeef32b9c950ywan 228233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d26, q8, #7 ;shift/round/saturate to u8 229233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d27, q9, #7 230233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d28, q10, #7 231233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d29, q11, #7 ;load intermediate data from stack 232233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d30, q12, #7 233233d2500723e5594f3e7c70896ffeeef32b9c950ywan 234233d2500723e5594f3e7c70896ffeeef32b9c950ywan;Second pass: 8x4 235233d2500723e5594f3e7c70896ffeeef32b9c950ywan;secondpass_filter 236233d2500723e5594f3e7c70896ffeeef32b9c950ywan add r3, r12, r3, lsl #5 237233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub lr, lr, #32 238233d2500723e5594f3e7c70896ffeeef32b9c950ywan 239233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.s32 {q5, q6}, [r3] ;load second_pass filter 240233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {q11}, [lr]! 241233d2500723e5594f3e7c70896ffeeef32b9c950ywan 242233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabs.s32 q7, q5 243233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabs.s32 q8, q6 244233d2500723e5594f3e7c70896ffeeef32b9c950ywan 245233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {q12}, [lr]! 246233d2500723e5594f3e7c70896ffeeef32b9c950ywan 247233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5) 248233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d1, d14[4] 249233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d2, d15[0] 250233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d3, d15[4] 251233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d4, d16[0] 252233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d5, d16[4] 253233d2500723e5594f3e7c70896ffeeef32b9c950ywan 254233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q3, d22, d0 ;(src_ptr[-2] * vp8_filter[0]) 255233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q4, d23, d0 256233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q5, d24, d0 257233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q6, d25, d0 258233d2500723e5594f3e7c70896ffeeef32b9c950ywan 259233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q3, d23, d1 ;-(src_ptr[-1] * vp8_filter[1]) 260233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q4, d24, d1 261233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q5, d25, d1 262233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q6, d26, d1 263233d2500723e5594f3e7c70896ffeeef32b9c950ywan 264233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q3, d26, d4 ;-(src_ptr[2] * vp8_filter[4]) 265233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q4, d27, d4 266233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q5, d28, d4 267233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q6, d29, d4 268233d2500723e5594f3e7c70896ffeeef32b9c950ywan 269233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q3, d24, d2 ;(src_ptr[0] * vp8_filter[2]) 270233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q4, d25, d2 271233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q5, d26, d2 272233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q6, d27, d2 273233d2500723e5594f3e7c70896ffeeef32b9c950ywan 274233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q3, d27, d5 ;(src_ptr[3] * vp8_filter[5]) 275233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q4, d28, d5 276233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q5, d29, d5 277233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q6, d30, d5 278233d2500723e5594f3e7c70896ffeeef32b9c950ywan 279233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q7, d25, d3 ;(src_ptr[1] * vp8_filter[3]) 280233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q8, d26, d3 281233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q9, d27, d3 282233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q10, d28, d3 283233d2500723e5594f3e7c70896ffeeef32b9c950ywan 284233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) 285233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q8, q4 286233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q9, q5 287233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q10, q6 288233d2500723e5594f3e7c70896ffeeef32b9c950ywan 289233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8 290233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d7, q8, #7 291233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d8, q9, #7 292233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d9, q10, #7 293233d2500723e5594f3e7c70896ffeeef32b9c950ywan 294233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d6}, [r4], r5 ;store result 295233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d7}, [r4], r5 296233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d8}, [r4], r5 297233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d9}, [r4], r5 298233d2500723e5594f3e7c70896ffeeef32b9c950ywan 299233d2500723e5594f3e7c70896ffeeef32b9c950ywan add sp, sp, #32 300233d2500723e5594f3e7c70896ffeeef32b9c950ywan pop {r4-r5,pc} 301233d2500723e5594f3e7c70896ffeeef32b9c950ywan 302233d2500723e5594f3e7c70896ffeeef32b9c950ywan;-------------------- 303233d2500723e5594f3e7c70896ffeeef32b9c950ywanfirstpass_filter8x4_only 304233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabs.s32 q12, q14 305233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabs.s32 q13, q15 306233d2500723e5594f3e7c70896ffeeef32b9c950ywan 307233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r0, r0, #2 ;move srcptr back to (line-2) and (column-2) 308233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {q3}, [r0], r1 ;load src data 309233d2500723e5594f3e7c70896ffeeef32b9c950ywan 310233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d0, d24[0] ;first_pass filter (d0-d5) 311233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {q4}, [r0], r1 312233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d1, d24[4] 313233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {q5}, [r0], r1 314233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d2, d25[0] 315233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {q6}, [r0], r1 316233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d3, d25[4] 317233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d4, d26[0] 318233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d5, d26[4] 319233d2500723e5594f3e7c70896ffeeef32b9c950ywan 320233d2500723e5594f3e7c70896ffeeef32b9c950ywan;First pass: output_height lines x output_width columns (4x8) 321233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [r0] 322233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [r0, r1] 323233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [r0, r1, lsl #1] 324233d2500723e5594f3e7c70896ffeeef32b9c950ywan 325233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp8_filter[0]) 326233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q8, d8, d0 327233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q9, d10, d0 328233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q10, d12, d0 329233d2500723e5594f3e7c70896ffeeef32b9c950ywan 330233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d28, d6, d7, #1 ;construct src_ptr[-1] 331233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d29, d8, d9, #1 332233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d30, d10, d11, #1 333233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d31, d12, d13, #1 334233d2500723e5594f3e7c70896ffeeef32b9c950ywan 335233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q7, d28, d1 ;-(src_ptr[-1] * vp8_filter[1]) 336233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q8, d29, d1 337233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q9, d30, d1 338233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q10, d31, d1 339233d2500723e5594f3e7c70896ffeeef32b9c950ywan 340233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d28, d6, d7, #4 ;construct src_ptr[2] 341233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d29, d8, d9, #4 342233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d30, d10, d11, #4 343233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d31, d12, d13, #4 344233d2500723e5594f3e7c70896ffeeef32b9c950ywan 345233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q7, d28, d4 ;-(src_ptr[2] * vp8_filter[4]) 346233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q8, d29, d4 347233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q9, d30, d4 348233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q10, d31, d4 349233d2500723e5594f3e7c70896ffeeef32b9c950ywan 350233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d28, d6, d7, #2 ;construct src_ptr[0] 351233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d29, d8, d9, #2 352233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d30, d10, d11, #2 353233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d31, d12, d13, #2 354233d2500723e5594f3e7c70896ffeeef32b9c950ywan 355233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q7, d28, d2 ;(src_ptr[0] * vp8_filter[2]) 356233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q8, d29, d2 357233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q9, d30, d2 358233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q10, d31, d2 359233d2500723e5594f3e7c70896ffeeef32b9c950ywan 360233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d28, d6, d7, #5 ;construct src_ptr[3] 361233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d29, d8, d9, #5 362233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d30, d10, d11, #5 363233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d31, d12, d13, #5 364233d2500723e5594f3e7c70896ffeeef32b9c950ywan 365233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q7, d28, d5 ;(src_ptr[3] * vp8_filter[5]) 366233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q8, d29, d5 367233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q9, d30, d5 368233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q10, d31, d5 369233d2500723e5594f3e7c70896ffeeef32b9c950ywan 370233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d28, d6, d7, #3 ;construct src_ptr[1] 371233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d29, d8, d9, #3 372233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d30, d10, d11, #3 373233d2500723e5594f3e7c70896ffeeef32b9c950ywan vext.8 d31, d12, d13, #3 374233d2500723e5594f3e7c70896ffeeef32b9c950ywan 375233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q3, d28, d3 ;(src_ptr[1] * vp8_filter[3]) 376233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q4, d29, d3 377233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q5, d30, d3 378233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q6, d31, d3 379233d2500723e5594f3e7c70896ffeeef32b9c950ywan 380233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) 381233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q8, q4 382233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q9, q5 383233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q10, q6 384233d2500723e5594f3e7c70896ffeeef32b9c950ywan 385233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d22, q7, #7 ;shift/round/saturate to u8 386233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d23, q8, #7 387233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d24, q9, #7 388233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d25, q10, #7 389233d2500723e5594f3e7c70896ffeeef32b9c950ywan 390233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d22}, [r4], r5 ;store result 391233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d23}, [r4], r5 392233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d24}, [r4], r5 393233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d25}, [r4], r5 394233d2500723e5594f3e7c70896ffeeef32b9c950ywan 395233d2500723e5594f3e7c70896ffeeef32b9c950ywan pop {r4-r5,pc} 396233d2500723e5594f3e7c70896ffeeef32b9c950ywan 397233d2500723e5594f3e7c70896ffeeef32b9c950ywan;--------------------- 398233d2500723e5594f3e7c70896ffeeef32b9c950ywansecondpass_filter8x4_only 399233d2500723e5594f3e7c70896ffeeef32b9c950ywan;Second pass: 8x4 400233d2500723e5594f3e7c70896ffeeef32b9c950ywan add r3, r12, r3, lsl #5 401233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r0, r0, r1, lsl #1 402233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.s32 {q5, q6}, [r3] ;load second_pass filter 403233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabs.s32 q7, q5 404233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabs.s32 q8, q6 405233d2500723e5594f3e7c70896ffeeef32b9c950ywan 406233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d22}, [r0], r1 407233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d23}, [r0], r1 408233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d24}, [r0], r1 409233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5) 410233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d25}, [r0], r1 411233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d1, d14[4] 412233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d26}, [r0], r1 413233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d2, d15[0] 414233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d27}, [r0], r1 415233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d3, d15[4] 416233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d28}, [r0], r1 417233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d4, d16[0] 418233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d29}, [r0], r1 419233d2500723e5594f3e7c70896ffeeef32b9c950ywan vdup.8 d5, d16[4] 420233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d30}, [r0], r1 421233d2500723e5594f3e7c70896ffeeef32b9c950ywan 422233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q3, d22, d0 ;(src_ptr[-2] * vp8_filter[0]) 423233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q4, d23, d0 424233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q5, d24, d0 425233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q6, d25, d0 426233d2500723e5594f3e7c70896ffeeef32b9c950ywan 427233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q3, d23, d1 ;-(src_ptr[-1] * vp8_filter[1]) 428233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q4, d24, d1 429233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q5, d25, d1 430233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q6, d26, d1 431233d2500723e5594f3e7c70896ffeeef32b9c950ywan 432233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q3, d26, d4 ;-(src_ptr[2] * vp8_filter[4]) 433233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q4, d27, d4 434233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q5, d28, d4 435233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlsl.u8 q6, d29, d4 436233d2500723e5594f3e7c70896ffeeef32b9c950ywan 437233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q3, d24, d2 ;(src_ptr[0] * vp8_filter[2]) 438233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q4, d25, d2 439233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q5, d26, d2 440233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q6, d27, d2 441233d2500723e5594f3e7c70896ffeeef32b9c950ywan 442233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q3, d27, d5 ;(src_ptr[3] * vp8_filter[5]) 443233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q4, d28, d5 444233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q5, d29, d5 445233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q6, d30, d5 446233d2500723e5594f3e7c70896ffeeef32b9c950ywan 447233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q7, d25, d3 ;(src_ptr[1] * vp8_filter[3]) 448233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q8, d26, d3 449233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q9, d27, d3 450233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.u8 q10, d28, d3 451233d2500723e5594f3e7c70896ffeeef32b9c950ywan 452233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters) 453233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q8, q4 454233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q9, q5 455233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s16 q10, q6 456233d2500723e5594f3e7c70896ffeeef32b9c950ywan 457233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8 458233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d7, q8, #7 459233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d8, q9, #7 460233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s16 d9, q10, #7 461233d2500723e5594f3e7c70896ffeeef32b9c950ywan 462233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d6}, [r4], r5 ;store result 463233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d7}, [r4], r5 464233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d8}, [r4], r5 465233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d9}, [r4], r5 466233d2500723e5594f3e7c70896ffeeef32b9c950ywan 467233d2500723e5594f3e7c70896ffeeef32b9c950ywan pop {r4-r5,pc} 468233d2500723e5594f3e7c70896ffeeef32b9c950ywan 469233d2500723e5594f3e7c70896ffeeef32b9c950ywan ENDP 470233d2500723e5594f3e7c70896ffeeef32b9c950ywan 471233d2500723e5594f3e7c70896ffeeef32b9c950ywan;----------------- 472233d2500723e5594f3e7c70896ffeeef32b9c950ywan 473233d2500723e5594f3e7c70896ffeeef32b9c950ywan END 474