1233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 2233d2500723e5594f3e7c70896ffeeef32b9c950ywan; Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 4233d2500723e5594f3e7c70896ffeeef32b9c950ywan; Use of this source code is governed by a BSD-style license 5233d2500723e5594f3e7c70896ffeeef32b9c950ywan; that can be found in the LICENSE file in the root of the source 6233d2500723e5594f3e7c70896ffeeef32b9c950ywan; tree. An additional intellectual property rights grant can be found 7233d2500723e5594f3e7c70896ffeeef32b9c950ywan; in the file PATENTS. All contributing project authors may 8233d2500723e5594f3e7c70896ffeeef32b9c950ywan; be found in the AUTHORS file in the root of the source tree. 9233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 10233d2500723e5594f3e7c70896ffeeef32b9c950ywan 11233d2500723e5594f3e7c70896ffeeef32b9c950ywan 12233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; These functions are only valid when: 13233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; x_step_q4 == 16 14233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; w%4 == 0 15233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; h%4 == 0 16233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; taps == 8 17233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; VP9_FILTER_WEIGHT == 128 18233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; VP9_FILTER_SHIFT == 7 19233d2500723e5594f3e7c70896ffeeef32b9c950ywan 20233d2500723e5594f3e7c70896ffeeef32b9c950ywan EXPORT |vp9_convolve8_horiz_neon| 21233d2500723e5594f3e7c70896ffeeef32b9c950ywan EXPORT |vp9_convolve8_vert_neon| 22233d2500723e5594f3e7c70896ffeeef32b9c950ywan IMPORT |vp9_convolve8_horiz_c| 23233d2500723e5594f3e7c70896ffeeef32b9c950ywan IMPORT |vp9_convolve8_vert_c| 24233d2500723e5594f3e7c70896ffeeef32b9c950ywan ARM 25233d2500723e5594f3e7c70896ffeeef32b9c950ywan REQUIRE8 26233d2500723e5594f3e7c70896ffeeef32b9c950ywan PRESERVE8 27233d2500723e5594f3e7c70896ffeeef32b9c950ywan 28233d2500723e5594f3e7c70896ffeeef32b9c950ywan AREA ||.text||, CODE, READONLY, ALIGN=2 29233d2500723e5594f3e7c70896ffeeef32b9c950ywan 30233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; Multiply and accumulate by q0 31233d2500723e5594f3e7c70896ffeeef32b9c950ywan MACRO 32233d2500723e5594f3e7c70896ffeeef32b9c950ywan MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7 33233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.s16 $dst, $src0, d0[0] 34233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.s16 $dst, $src1, d0[1] 35233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.s16 $dst, $src2, d0[2] 36233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.s16 $dst, $src3, d0[3] 37233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.s16 $dst, $src4, d1[0] 38233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.s16 $dst, $src5, d1[1] 39233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.s16 $dst, $src6, d1[2] 40233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.s16 $dst, $src7, d1[3] 41233d2500723e5594f3e7c70896ffeeef32b9c950ywan MEND 42233d2500723e5594f3e7c70896ffeeef32b9c950ywan 43233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r0 const uint8_t *src 44233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r1 int src_stride 45233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r2 uint8_t *dst 46233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r3 int dst_stride 47233d2500723e5594f3e7c70896ffeeef32b9c950ywan; sp[]const int16_t *filter_x 48233d2500723e5594f3e7c70896ffeeef32b9c950ywan; sp[]int x_step_q4 49233d2500723e5594f3e7c70896ffeeef32b9c950ywan; sp[]const int16_t *filter_y ; unused 50233d2500723e5594f3e7c70896ffeeef32b9c950ywan; sp[]int y_step_q4 ; unused 51233d2500723e5594f3e7c70896ffeeef32b9c950ywan; sp[]int w 52233d2500723e5594f3e7c70896ffeeef32b9c950ywan; sp[]int h 53233d2500723e5594f3e7c70896ffeeef32b9c950ywan 54233d2500723e5594f3e7c70896ffeeef32b9c950ywan|vp9_convolve8_horiz_neon| PROC 55233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r12, [sp, #4] ; x_step_q4 56233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r12, #16 57233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne vp9_convolve8_horiz_c 58233d2500723e5594f3e7c70896ffeeef32b9c950ywan 59233d2500723e5594f3e7c70896ffeeef32b9c950ywan push {r4-r10, lr} 60233d2500723e5594f3e7c70896ffeeef32b9c950ywan 61233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r0, r0, #3 ; adjust for taps 62233d2500723e5594f3e7c70896ffeeef32b9c950ywan 63233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r5, [sp, #32] ; filter_x 64233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r6, [sp, #48] ; w 65233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r7, [sp, #52] ; h 66233d2500723e5594f3e7c70896ffeeef32b9c950ywan 67233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.s16 {q0}, [r5] ; filter_x 68233d2500723e5594f3e7c70896ffeeef32b9c950ywan 69233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r8, r1, r1, lsl #2 ; -src_stride * 3 70233d2500723e5594f3e7c70896ffeeef32b9c950ywan add r8, r8, #4 ; -src_stride * 3 + 4 71233d2500723e5594f3e7c70896ffeeef32b9c950ywan 72233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r4, r3, r3, lsl #2 ; -dst_stride * 3 73233d2500723e5594f3e7c70896ffeeef32b9c950ywan add r4, r4, #4 ; -dst_stride * 3 + 4 74233d2500723e5594f3e7c70896ffeeef32b9c950ywan 75233d2500723e5594f3e7c70896ffeeef32b9c950ywan rsb r9, r6, r1, lsl #2 ; reset src for outer loop 76233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r9, r9, #7 77233d2500723e5594f3e7c70896ffeeef32b9c950ywan rsb r12, r6, r3, lsl #2 ; reset dst for outer loop 78233d2500723e5594f3e7c70896ffeeef32b9c950ywan 79233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r10, r6 ; w loop counter 80233d2500723e5594f3e7c70896ffeeef32b9c950ywan 81233d2500723e5594f3e7c70896ffeeef32b9c950ywanloop_horiz_v 82233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d24}, [r0], r1 83233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d25}, [r0], r1 84233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d26}, [r0], r1 85233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d27}, [r0], r8 86233d2500723e5594f3e7c70896ffeeef32b9c950ywan 87233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 q12, q13 88233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d24, d25 89233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d26, d27 90233d2500723e5594f3e7c70896ffeeef32b9c950ywan 91233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [r0, r1, lsl #2] 92233d2500723e5594f3e7c70896ffeeef32b9c950ywan 93233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmovl.u8 q8, d24 94233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmovl.u8 q9, d25 95233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmovl.u8 q10, d26 96233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmovl.u8 q11, d27 97233d2500723e5594f3e7c70896ffeeef32b9c950ywan 98233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; save a few instructions in the inner loop 99233d2500723e5594f3e7c70896ffeeef32b9c950ywan vswp d17, d18 100233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmov d23, d21 101233d2500723e5594f3e7c70896ffeeef32b9c950ywan 102233d2500723e5594f3e7c70896ffeeef32b9c950ywan add r0, r0, #3 103233d2500723e5594f3e7c70896ffeeef32b9c950ywan 104233d2500723e5594f3e7c70896ffeeef32b9c950ywanloop_horiz 105233d2500723e5594f3e7c70896ffeeef32b9c950ywan add r5, r0, #64 106233d2500723e5594f3e7c70896ffeeef32b9c950ywan 107233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.32 {d28[]}, [r0], r1 108233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.32 {d29[]}, [r0], r1 109233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.32 {d31[]}, [r0], r1 110233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.32 {d30[]}, [r0], r8 111233d2500723e5594f3e7c70896ffeeef32b9c950ywan 112233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [r5] 113233d2500723e5594f3e7c70896ffeeef32b9c950ywan 114233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 d28, d31 115233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 d29, d30 116233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d28, d29 117233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d31, d30 118233d2500723e5594f3e7c70896ffeeef32b9c950ywan 119233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [r5, r1] 120233d2500723e5594f3e7c70896ffeeef32b9c950ywan 121233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; extract to s16 122233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.32 q14, q15 123233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmovl.u8 q12, d28 124233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmovl.u8 q13, d29 125233d2500723e5594f3e7c70896ffeeef32b9c950ywan 126233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [r5, r1, lsl #1] 127233d2500723e5594f3e7c70896ffeeef32b9c950ywan 128233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; src[] * filter_x 129233d2500723e5594f3e7c70896ffeeef32b9c950ywan MULTIPLY_BY_Q0 q1, d16, d17, d20, d22, d18, d19, d23, d24 130233d2500723e5594f3e7c70896ffeeef32b9c950ywan MULTIPLY_BY_Q0 q2, d17, d20, d22, d18, d19, d23, d24, d26 131233d2500723e5594f3e7c70896ffeeef32b9c950ywan MULTIPLY_BY_Q0 q14, d20, d22, d18, d19, d23, d24, d26, d27 132233d2500723e5594f3e7c70896ffeeef32b9c950ywan MULTIPLY_BY_Q0 q15, d22, d18, d19, d23, d24, d26, d27, d25 133233d2500723e5594f3e7c70896ffeeef32b9c950ywan 134233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [r5, -r8] 135233d2500723e5594f3e7c70896ffeeef32b9c950ywan 136233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; += 64 >> 7 137233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s32 d2, q1, #7 138233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s32 d3, q2, #7 139233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s32 d4, q14, #7 140233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s32 d5, q15, #7 141233d2500723e5594f3e7c70896ffeeef32b9c950ywan 142233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; saturate 143233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqmovn.u16 d2, q1 144233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqmovn.u16 d3, q2 145233d2500723e5594f3e7c70896ffeeef32b9c950ywan 146233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; transpose 147233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 d2, d3 148233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.32 d2, d3 149233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d2, d3 150233d2500723e5594f3e7c70896ffeeef32b9c950ywan 151233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u32 {d2[0]}, [r2@32], r3 152233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u32 {d3[0]}, [r2@32], r3 153233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u32 {d2[1]}, [r2@32], r3 154233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u32 {d3[1]}, [r2@32], r4 155233d2500723e5594f3e7c70896ffeeef32b9c950ywan 156233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmov q8, q9 157233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmov d20, d23 158233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmov q11, q12 159233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmov q9, q13 160233d2500723e5594f3e7c70896ffeeef32b9c950ywan 161233d2500723e5594f3e7c70896ffeeef32b9c950ywan subs r6, r6, #4 ; w -= 4 162233d2500723e5594f3e7c70896ffeeef32b9c950ywan bgt loop_horiz 163233d2500723e5594f3e7c70896ffeeef32b9c950ywan 164233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; outer loop 165233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r6, r10 ; restore w counter 166233d2500723e5594f3e7c70896ffeeef32b9c950ywan add r0, r0, r9 ; src += src_stride * 4 - w 167233d2500723e5594f3e7c70896ffeeef32b9c950ywan add r2, r2, r12 ; dst += dst_stride * 4 - w 168233d2500723e5594f3e7c70896ffeeef32b9c950ywan subs r7, r7, #4 ; h -= 4 169233d2500723e5594f3e7c70896ffeeef32b9c950ywan bgt loop_horiz_v 170233d2500723e5594f3e7c70896ffeeef32b9c950ywan 171233d2500723e5594f3e7c70896ffeeef32b9c950ywan pop {r4-r10, pc} 172233d2500723e5594f3e7c70896ffeeef32b9c950ywan 173233d2500723e5594f3e7c70896ffeeef32b9c950ywan ENDP 174233d2500723e5594f3e7c70896ffeeef32b9c950ywan 175233d2500723e5594f3e7c70896ffeeef32b9c950ywan|vp9_convolve8_vert_neon| PROC 176233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r12, [sp, #12] 177233d2500723e5594f3e7c70896ffeeef32b9c950ywan cmp r12, #16 178233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne vp9_convolve8_vert_c 179233d2500723e5594f3e7c70896ffeeef32b9c950ywan 180233d2500723e5594f3e7c70896ffeeef32b9c950ywan push {r4-r8, lr} 181233d2500723e5594f3e7c70896ffeeef32b9c950ywan 182233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; adjust for taps 183233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r0, r0, r1 184233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r0, r0, r1, lsl #1 185233d2500723e5594f3e7c70896ffeeef32b9c950ywan 186233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r4, [sp, #32] ; filter_y 187233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r6, [sp, #40] ; w 188233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr lr, [sp, #44] ; h 189233d2500723e5594f3e7c70896ffeeef32b9c950ywan 190233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.s16 {q0}, [r4] ; filter_y 191233d2500723e5594f3e7c70896ffeeef32b9c950ywan 192233d2500723e5594f3e7c70896ffeeef32b9c950ywan lsl r1, r1, #1 193233d2500723e5594f3e7c70896ffeeef32b9c950ywan lsl r3, r3, #1 194233d2500723e5594f3e7c70896ffeeef32b9c950ywan 195233d2500723e5594f3e7c70896ffeeef32b9c950ywanloop_vert_h 196233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r4, r0 197233d2500723e5594f3e7c70896ffeeef32b9c950ywan add r7, r0, r1, asr #1 198233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r5, r2 199233d2500723e5594f3e7c70896ffeeef32b9c950ywan add r8, r2, r3, asr #1 200233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r12, lr ; h loop counter 201233d2500723e5594f3e7c70896ffeeef32b9c950ywan 202233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u32 {d16[0]}, [r4], r1 203233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u32 {d16[1]}, [r7], r1 204233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u32 {d18[0]}, [r4], r1 205233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u32 {d18[1]}, [r7], r1 206233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u32 {d20[0]}, [r4], r1 207233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u32 {d20[1]}, [r7], r1 208233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u32 {d22[0]}, [r4], r1 209233d2500723e5594f3e7c70896ffeeef32b9c950ywan 210233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmovl.u8 q8, d16 211233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmovl.u8 q9, d18 212233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmovl.u8 q10, d20 213233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmovl.u8 q11, d22 214233d2500723e5594f3e7c70896ffeeef32b9c950ywan 215233d2500723e5594f3e7c70896ffeeef32b9c950ywanloop_vert 216233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; always process a 4x4 block at a time 217233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u32 {d24[0]}, [r7], r1 218233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u32 {d26[0]}, [r4], r1 219233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u32 {d26[1]}, [r7], r1 220233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u32 {d24[1]}, [r4], r1 221233d2500723e5594f3e7c70896ffeeef32b9c950ywan 222233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; extract to s16 223233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmovl.u8 q12, d24 224233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmovl.u8 q13, d26 225233d2500723e5594f3e7c70896ffeeef32b9c950ywan 226233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [r5] 227233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [r8] 228233d2500723e5594f3e7c70896ffeeef32b9c950ywan 229233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; src[] * filter_y 230233d2500723e5594f3e7c70896ffeeef32b9c950ywan MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d24 231233d2500723e5594f3e7c70896ffeeef32b9c950ywan 232233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [r5, r3] 233233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [r8, r3] 234233d2500723e5594f3e7c70896ffeeef32b9c950ywan 235233d2500723e5594f3e7c70896ffeeef32b9c950ywan MULTIPLY_BY_Q0 q2, d17, d18, d19, d20, d21, d22, d24, d26 236233d2500723e5594f3e7c70896ffeeef32b9c950ywan 237233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [r7] 238233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [r4] 239233d2500723e5594f3e7c70896ffeeef32b9c950ywan 240233d2500723e5594f3e7c70896ffeeef32b9c950ywan MULTIPLY_BY_Q0 q14, d18, d19, d20, d21, d22, d24, d26, d27 241233d2500723e5594f3e7c70896ffeeef32b9c950ywan 242233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [r7, r1] 243233d2500723e5594f3e7c70896ffeeef32b9c950ywan pld [r4, r1] 244233d2500723e5594f3e7c70896ffeeef32b9c950ywan 245233d2500723e5594f3e7c70896ffeeef32b9c950ywan MULTIPLY_BY_Q0 q15, d19, d20, d21, d22, d24, d26, d27, d25 246233d2500723e5594f3e7c70896ffeeef32b9c950ywan 247233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; += 64 >> 7 248233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s32 d2, q1, #7 249233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s32 d3, q2, #7 250233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s32 d4, q14, #7 251233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrun.s32 d5, q15, #7 252233d2500723e5594f3e7c70896ffeeef32b9c950ywan 253233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; saturate 254233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqmovn.u16 d2, q1 255233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqmovn.u16 d3, q2 256233d2500723e5594f3e7c70896ffeeef32b9c950ywan 257233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u32 {d2[0]}, [r5@32], r3 258233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u32 {d2[1]}, [r8@32], r3 259233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u32 {d3[0]}, [r5@32], r3 260233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u32 {d3[1]}, [r8@32], r3 261233d2500723e5594f3e7c70896ffeeef32b9c950ywan 262233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmov q8, q10 263233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmov d18, d22 264233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmov d19, d24 265233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmov q10, q13 266233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmov d22, d25 267233d2500723e5594f3e7c70896ffeeef32b9c950ywan 268233d2500723e5594f3e7c70896ffeeef32b9c950ywan subs r12, r12, #4 ; h -= 4 269233d2500723e5594f3e7c70896ffeeef32b9c950ywan bgt loop_vert 270233d2500723e5594f3e7c70896ffeeef32b9c950ywan 271233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; outer loop 272233d2500723e5594f3e7c70896ffeeef32b9c950ywan add r0, r0, #4 273233d2500723e5594f3e7c70896ffeeef32b9c950ywan add r2, r2, #4 274233d2500723e5594f3e7c70896ffeeef32b9c950ywan subs r6, r6, #4 ; w -= 4 275233d2500723e5594f3e7c70896ffeeef32b9c950ywan bgt loop_vert_h 276233d2500723e5594f3e7c70896ffeeef32b9c950ywan 277233d2500723e5594f3e7c70896ffeeef32b9c950ywan pop {r4-r8, pc} 278233d2500723e5594f3e7c70896ffeeef32b9c950ywan 279233d2500723e5594f3e7c70896ffeeef32b9c950ywan ENDP 280233d2500723e5594f3e7c70896ffeeef32b9c950ywan END 281