1233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 2233d2500723e5594f3e7c70896ffeeef32b9c950ywan; Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 4233d2500723e5594f3e7c70896ffeeef32b9c950ywan; Use of this source code is governed by a BSD-style license 5233d2500723e5594f3e7c70896ffeeef32b9c950ywan; that can be found in the LICENSE file in the root of the source 6233d2500723e5594f3e7c70896ffeeef32b9c950ywan; tree. An additional intellectual property rights grant can be found 7233d2500723e5594f3e7c70896ffeeef32b9c950ywan; in the file PATENTS. All contributing project authors may 8233d2500723e5594f3e7c70896ffeeef32b9c950ywan; be found in the AUTHORS file in the root of the source tree. 9233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 10233d2500723e5594f3e7c70896ffeeef32b9c950ywan 11233d2500723e5594f3e7c70896ffeeef32b9c950ywan EXPORT |vp9_lpf_horizontal_16_neon| 12233d2500723e5594f3e7c70896ffeeef32b9c950ywan EXPORT |vp9_lpf_vertical_16_neon| 13233d2500723e5594f3e7c70896ffeeef32b9c950ywan ARM 14233d2500723e5594f3e7c70896ffeeef32b9c950ywan 15233d2500723e5594f3e7c70896ffeeef32b9c950ywan AREA ||.text||, CODE, READONLY, ALIGN=2 16233d2500723e5594f3e7c70896ffeeef32b9c950ywan 17233d2500723e5594f3e7c70896ffeeef32b9c950ywan; void vp9_lpf_horizontal_16_neon(uint8_t *s, int p, 18233d2500723e5594f3e7c70896ffeeef32b9c950ywan; const uint8_t *blimit, 19233d2500723e5594f3e7c70896ffeeef32b9c950ywan; const uint8_t *limit, 20233d2500723e5594f3e7c70896ffeeef32b9c950ywan; const uint8_t *thresh 21233d2500723e5594f3e7c70896ffeeef32b9c950ywan; int count) 22233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r0 uint8_t *s, 23233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r1 int p, /* pitch */ 24233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r2 const uint8_t *blimit, 25233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r3 const uint8_t *limit, 26233d2500723e5594f3e7c70896ffeeef32b9c950ywan; sp const uint8_t *thresh, 27233d2500723e5594f3e7c70896ffeeef32b9c950ywan|vp9_lpf_horizontal_16_neon| PROC 28233d2500723e5594f3e7c70896ffeeef32b9c950ywan push {r4-r8, lr} 29233d2500723e5594f3e7c70896ffeeef32b9c950ywan vpush {d8-d15} 30233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r4, [sp, #88] ; load thresh 31233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r12, [sp, #92] ; load count 32233d2500723e5594f3e7c70896ffeeef32b9c950ywan 33233d2500723e5594f3e7c70896ffeeef32b9c950ywanh_count 34233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d16[]}, [r2] ; load *blimit 35233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d17[]}, [r3] ; load *limit 36233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d18[]}, [r4] ; load *thresh 37233d2500723e5594f3e7c70896ffeeef32b9c950ywan 38233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r8, r0, r1, lsl #3 ; move src pointer down by 8 lines 39233d2500723e5594f3e7c70896ffeeef32b9c950ywan 40233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d0}, [r8@64], r1 ; p7 41233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d1}, [r8@64], r1 ; p6 42233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d2}, [r8@64], r1 ; p5 43233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d3}, [r8@64], r1 ; p4 44233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d4}, [r8@64], r1 ; p3 45233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d5}, [r8@64], r1 ; p2 46233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d6}, [r8@64], r1 ; p1 47233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d7}, [r8@64], r1 ; p0 48233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d8}, [r8@64], r1 ; q0 49233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d9}, [r8@64], r1 ; q1 50233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d10}, [r8@64], r1 ; q2 51233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d11}, [r8@64], r1 ; q3 52233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d12}, [r8@64], r1 ; q4 53233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d13}, [r8@64], r1 ; q5 54233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d14}, [r8@64], r1 ; q6 55233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.u8 {d15}, [r8@64], r1 ; q7 56233d2500723e5594f3e7c70896ffeeef32b9c950ywan 57233d2500723e5594f3e7c70896ffeeef32b9c950ywan bl vp9_wide_mbfilter_neon 58233d2500723e5594f3e7c70896ffeeef32b9c950ywan 59233d2500723e5594f3e7c70896ffeeef32b9c950ywan tst r7, #1 60233d2500723e5594f3e7c70896ffeeef32b9c950ywan beq h_mbfilter 61233d2500723e5594f3e7c70896ffeeef32b9c950ywan 62233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; flat && mask were not set for any of the channels. Just store the values 63233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; from filter. 64233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r8, r0, r1, lsl #1 65233d2500723e5594f3e7c70896ffeeef32b9c950ywan 66233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d25}, [r8@64], r1 ; store op1 67233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d24}, [r8@64], r1 ; store op0 68233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d23}, [r8@64], r1 ; store oq0 69233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d26}, [r8@64], r1 ; store oq1 70233d2500723e5594f3e7c70896ffeeef32b9c950ywan 71233d2500723e5594f3e7c70896ffeeef32b9c950ywan b h_next 72233d2500723e5594f3e7c70896ffeeef32b9c950ywan 73233d2500723e5594f3e7c70896ffeeef32b9c950ywanh_mbfilter 74233d2500723e5594f3e7c70896ffeeef32b9c950ywan tst r7, #2 75233d2500723e5594f3e7c70896ffeeef32b9c950ywan beq h_wide_mbfilter 76233d2500723e5594f3e7c70896ffeeef32b9c950ywan 77233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; flat2 was not set for any of the channels. Just store the values from 78233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; mbfilter. 79233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r8, r0, r1, lsl #1 80233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r8, r8, r1 81233d2500723e5594f3e7c70896ffeeef32b9c950ywan 82233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d18}, [r8@64], r1 ; store op2 83233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d19}, [r8@64], r1 ; store op1 84233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d20}, [r8@64], r1 ; store op0 85233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d21}, [r8@64], r1 ; store oq0 86233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d22}, [r8@64], r1 ; store oq1 87233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d23}, [r8@64], r1 ; store oq2 88233d2500723e5594f3e7c70896ffeeef32b9c950ywan 89233d2500723e5594f3e7c70896ffeeef32b9c950ywan b h_next 90233d2500723e5594f3e7c70896ffeeef32b9c950ywan 91233d2500723e5594f3e7c70896ffeeef32b9c950ywanh_wide_mbfilter 92233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r8, r0, r1, lsl #3 93233d2500723e5594f3e7c70896ffeeef32b9c950ywan add r8, r8, r1 94233d2500723e5594f3e7c70896ffeeef32b9c950ywan 95233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d16}, [r8@64], r1 ; store op6 96233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d24}, [r8@64], r1 ; store op5 97233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d25}, [r8@64], r1 ; store op4 98233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d26}, [r8@64], r1 ; store op3 99233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d27}, [r8@64], r1 ; store op2 100233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d18}, [r8@64], r1 ; store op1 101233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d19}, [r8@64], r1 ; store op0 102233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d20}, [r8@64], r1 ; store oq0 103233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d21}, [r8@64], r1 ; store oq1 104233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d22}, [r8@64], r1 ; store oq2 105233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d23}, [r8@64], r1 ; store oq3 106233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d1}, [r8@64], r1 ; store oq4 107233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d2}, [r8@64], r1 ; store oq5 108233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.u8 {d3}, [r8@64], r1 ; store oq6 109233d2500723e5594f3e7c70896ffeeef32b9c950ywan 110233d2500723e5594f3e7c70896ffeeef32b9c950ywanh_next 111233d2500723e5594f3e7c70896ffeeef32b9c950ywan add r0, r0, #8 112233d2500723e5594f3e7c70896ffeeef32b9c950ywan subs r12, r12, #1 113233d2500723e5594f3e7c70896ffeeef32b9c950ywan bne h_count 114233d2500723e5594f3e7c70896ffeeef32b9c950ywan 115233d2500723e5594f3e7c70896ffeeef32b9c950ywan vpop {d8-d15} 116233d2500723e5594f3e7c70896ffeeef32b9c950ywan pop {r4-r8, pc} 117233d2500723e5594f3e7c70896ffeeef32b9c950ywan 118233d2500723e5594f3e7c70896ffeeef32b9c950ywan ENDP ; |vp9_lpf_horizontal_16_neon| 119233d2500723e5594f3e7c70896ffeeef32b9c950ywan 120233d2500723e5594f3e7c70896ffeeef32b9c950ywan; void vp9_lpf_vertical_16_neon(uint8_t *s, int p, 121233d2500723e5594f3e7c70896ffeeef32b9c950ywan; const uint8_t *blimit, 122233d2500723e5594f3e7c70896ffeeef32b9c950ywan; const uint8_t *limit, 123233d2500723e5594f3e7c70896ffeeef32b9c950ywan; const uint8_t *thresh) 124233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r0 uint8_t *s, 125233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r1 int p, /* pitch */ 126233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r2 const uint8_t *blimit, 127233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r3 const uint8_t *limit, 128233d2500723e5594f3e7c70896ffeeef32b9c950ywan; sp const uint8_t *thresh, 129233d2500723e5594f3e7c70896ffeeef32b9c950ywan|vp9_lpf_vertical_16_neon| PROC 130233d2500723e5594f3e7c70896ffeeef32b9c950ywan push {r4-r8, lr} 131233d2500723e5594f3e7c70896ffeeef32b9c950ywan vpush {d8-d15} 132233d2500723e5594f3e7c70896ffeeef32b9c950ywan ldr r4, [sp, #88] ; load thresh 133233d2500723e5594f3e7c70896ffeeef32b9c950ywan 134233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d16[]}, [r2] ; load *blimit 135233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d17[]}, [r3] ; load *limit 136233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d18[]}, [r4] ; load *thresh 137233d2500723e5594f3e7c70896ffeeef32b9c950ywan 138233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r8, r0, #8 139233d2500723e5594f3e7c70896ffeeef32b9c950ywan 140233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d0}, [r8@64], r1 141233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d8}, [r0@64], r1 142233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d1}, [r8@64], r1 143233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d9}, [r0@64], r1 144233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d2}, [r8@64], r1 145233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d10}, [r0@64], r1 146233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d3}, [r8@64], r1 147233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d11}, [r0@64], r1 148233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d4}, [r8@64], r1 149233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d12}, [r0@64], r1 150233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d5}, [r8@64], r1 151233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d13}, [r0@64], r1 152233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d6}, [r8@64], r1 153233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d14}, [r0@64], r1 154233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d7}, [r8@64], r1 155233d2500723e5594f3e7c70896ffeeef32b9c950ywan vld1.8 {d15}, [r0@64], r1 156233d2500723e5594f3e7c70896ffeeef32b9c950ywan 157233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r0, r0, r1, lsl #3 158233d2500723e5594f3e7c70896ffeeef32b9c950ywan 159233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.32 q0, q2 160233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.32 q1, q3 161233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.32 q4, q6 162233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.32 q5, q7 163233d2500723e5594f3e7c70896ffeeef32b9c950ywan 164233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 q0, q1 165233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 q2, q3 166233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 q4, q5 167233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 q6, q7 168233d2500723e5594f3e7c70896ffeeef32b9c950ywan 169233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d0, d1 170233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d2, d3 171233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d4, d5 172233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d6, d7 173233d2500723e5594f3e7c70896ffeeef32b9c950ywan 174233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d8, d9 175233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d10, d11 176233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d12, d13 177233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d14, d15 178233d2500723e5594f3e7c70896ffeeef32b9c950ywan 179233d2500723e5594f3e7c70896ffeeef32b9c950ywan bl vp9_wide_mbfilter_neon 180233d2500723e5594f3e7c70896ffeeef32b9c950ywan 181233d2500723e5594f3e7c70896ffeeef32b9c950ywan tst r7, #1 182233d2500723e5594f3e7c70896ffeeef32b9c950ywan beq v_mbfilter 183233d2500723e5594f3e7c70896ffeeef32b9c950ywan 184233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; flat && mask were not set for any of the channels. Just store the values 185233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; from filter. 186233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r8, r0, #2 187233d2500723e5594f3e7c70896ffeeef32b9c950ywan 188233d2500723e5594f3e7c70896ffeeef32b9c950ywan vswp d23, d25 189233d2500723e5594f3e7c70896ffeeef32b9c950ywan 190233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r8], r1 191233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r8], r1 192233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r8], r1 193233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r8], r1 194233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r8], r1 195233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r8], r1 196233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r8], r1 197233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r8], r1 198233d2500723e5594f3e7c70896ffeeef32b9c950ywan 199233d2500723e5594f3e7c70896ffeeef32b9c950ywan b v_end 200233d2500723e5594f3e7c70896ffeeef32b9c950ywan 201233d2500723e5594f3e7c70896ffeeef32b9c950ywanv_mbfilter 202233d2500723e5594f3e7c70896ffeeef32b9c950ywan tst r7, #2 203233d2500723e5594f3e7c70896ffeeef32b9c950ywan beq v_wide_mbfilter 204233d2500723e5594f3e7c70896ffeeef32b9c950ywan 205233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; flat2 was not set for any of the channels. Just store the values from 206233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; mbfilter. 207233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r8, r0, #3 208233d2500723e5594f3e7c70896ffeeef32b9c950ywan 209233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1 210233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1 211233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1 212233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1 213233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1 214233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1 215233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1 216233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1 217233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1 218233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1 219233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1 220233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1 221233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1 222233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1 223233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1 224233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1 225233d2500723e5594f3e7c70896ffeeef32b9c950ywan 226233d2500723e5594f3e7c70896ffeeef32b9c950ywan b v_end 227233d2500723e5594f3e7c70896ffeeef32b9c950ywan 228233d2500723e5594f3e7c70896ffeeef32b9c950ywanv_wide_mbfilter 229233d2500723e5594f3e7c70896ffeeef32b9c950ywan sub r8, r0, #8 230233d2500723e5594f3e7c70896ffeeef32b9c950ywan 231233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.32 d0, d26 232233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.32 d16, d27 233233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.32 d24, d18 234233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.32 d25, d19 235233d2500723e5594f3e7c70896ffeeef32b9c950ywan 236233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 d0, d24 237233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 d16, d25 238233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 d26, d18 239233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 d27, d19 240233d2500723e5594f3e7c70896ffeeef32b9c950ywan 241233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d0, d16 242233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d24, d25 243233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d26, d27 244233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d18, d19 245233d2500723e5594f3e7c70896ffeeef32b9c950ywan 246233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.32 d20, d1 247233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.32 d21, d2 248233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.32 d22, d3 249233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.32 d23, d15 250233d2500723e5594f3e7c70896ffeeef32b9c950ywan 251233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 d20, d22 252233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 d21, d23 253233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 d1, d3 254233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.16 d2, d15 255233d2500723e5594f3e7c70896ffeeef32b9c950ywan 256233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d20, d21 257233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d22, d23 258233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d1, d2 259233d2500723e5594f3e7c70896ffeeef32b9c950ywan vtrn.8 d3, d15 260233d2500723e5594f3e7c70896ffeeef32b9c950ywan 261233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.8 {d0}, [r8@64], r1 262233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.8 {d20}, [r0@64], r1 263233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.8 {d16}, [r8@64], r1 264233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.8 {d21}, [r0@64], r1 265233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.8 {d24}, [r8@64], r1 266233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.8 {d22}, [r0@64], r1 267233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.8 {d25}, [r8@64], r1 268233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.8 {d23}, [r0@64], r1 269233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.8 {d26}, [r8@64], r1 270233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.8 {d1}, [r0@64], r1 271233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.8 {d27}, [r8@64], r1 272233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.8 {d2}, [r0@64], r1 273233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.8 {d18}, [r8@64], r1 274233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.8 {d3}, [r0@64], r1 275233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.8 {d19}, [r8@64], r1 276233d2500723e5594f3e7c70896ffeeef32b9c950ywan vst1.8 {d15}, [r0@64], r1 277233d2500723e5594f3e7c70896ffeeef32b9c950ywan 278233d2500723e5594f3e7c70896ffeeef32b9c950ywanv_end 279233d2500723e5594f3e7c70896ffeeef32b9c950ywan vpop {d8-d15} 280233d2500723e5594f3e7c70896ffeeef32b9c950ywan pop {r4-r8, pc} 281233d2500723e5594f3e7c70896ffeeef32b9c950ywan 282233d2500723e5594f3e7c70896ffeeef32b9c950ywan ENDP ; |vp9_lpf_vertical_16_neon| 283233d2500723e5594f3e7c70896ffeeef32b9c950ywan 284233d2500723e5594f3e7c70896ffeeef32b9c950ywan; void vp9_wide_mbfilter_neon(); 285233d2500723e5594f3e7c70896ffeeef32b9c950ywan; This is a helper function for the loopfilters. The invidual functions do the 286233d2500723e5594f3e7c70896ffeeef32b9c950ywan; necessary load, transpose (if necessary) and store. 287233d2500723e5594f3e7c70896ffeeef32b9c950ywan; 288233d2500723e5594f3e7c70896ffeeef32b9c950ywan; r0-r3 PRESERVE 289233d2500723e5594f3e7c70896ffeeef32b9c950ywan; d16 blimit 290233d2500723e5594f3e7c70896ffeeef32b9c950ywan; d17 limit 291233d2500723e5594f3e7c70896ffeeef32b9c950ywan; d18 thresh 292233d2500723e5594f3e7c70896ffeeef32b9c950ywan; d0 p7 293233d2500723e5594f3e7c70896ffeeef32b9c950ywan; d1 p6 294233d2500723e5594f3e7c70896ffeeef32b9c950ywan; d2 p5 295233d2500723e5594f3e7c70896ffeeef32b9c950ywan; d3 p4 296233d2500723e5594f3e7c70896ffeeef32b9c950ywan; d4 p3 297233d2500723e5594f3e7c70896ffeeef32b9c950ywan; d5 p2 298233d2500723e5594f3e7c70896ffeeef32b9c950ywan; d6 p1 299233d2500723e5594f3e7c70896ffeeef32b9c950ywan; d7 p0 300233d2500723e5594f3e7c70896ffeeef32b9c950ywan; d8 q0 301233d2500723e5594f3e7c70896ffeeef32b9c950ywan; d9 q1 302233d2500723e5594f3e7c70896ffeeef32b9c950ywan; d10 q2 303233d2500723e5594f3e7c70896ffeeef32b9c950ywan; d11 q3 304233d2500723e5594f3e7c70896ffeeef32b9c950ywan; d12 q4 305233d2500723e5594f3e7c70896ffeeef32b9c950ywan; d13 q5 306233d2500723e5594f3e7c70896ffeeef32b9c950ywan; d14 q6 307233d2500723e5594f3e7c70896ffeeef32b9c950ywan; d15 q7 308233d2500723e5594f3e7c70896ffeeef32b9c950ywan|vp9_wide_mbfilter_neon| PROC 309233d2500723e5594f3e7c70896ffeeef32b9c950ywan mov r7, #0 310233d2500723e5594f3e7c70896ffeeef32b9c950ywan 311233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; filter_mask 312233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d19, d4, d5 ; abs(p3 - p2) 313233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d20, d5, d6 ; abs(p2 - p1) 314233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d21, d6, d7 ; abs(p1 - p0) 315233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d22, d9, d8 ; abs(q1 - q0) 316233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d23, d10, d9 ; abs(q2 - q1) 317233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d24, d11, d10 ; abs(q3 - q2) 318233d2500723e5594f3e7c70896ffeeef32b9c950ywan 319233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; only compare the largest value to limit 320233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmax.u8 d19, d19, d20 ; max(abs(p3 - p2), abs(p2 - p1)) 321233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmax.u8 d20, d21, d22 ; max(abs(p1 - p0), abs(q1 - q0)) 322233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmax.u8 d23, d23, d24 ; max(abs(q2 - q1), abs(q3 - q2)) 323233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmax.u8 d19, d19, d20 324233d2500723e5594f3e7c70896ffeeef32b9c950ywan 325233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d24, d7, d8 ; abs(p0 - q0) 326233d2500723e5594f3e7c70896ffeeef32b9c950ywan 327233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmax.u8 d19, d19, d23 328233d2500723e5594f3e7c70896ffeeef32b9c950ywan 329233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d23, d6, d9 ; a = abs(p1 - q1) 330233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.u8 d24, d24, d24 ; b = abs(p0 - q0) * 2 331233d2500723e5594f3e7c70896ffeeef32b9c950ywan 332233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; abs () > limit 333233d2500723e5594f3e7c70896ffeeef32b9c950ywan vcge.u8 d19, d17, d19 334233d2500723e5594f3e7c70896ffeeef32b9c950ywan 335233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; flatmask4 336233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d25, d7, d5 ; abs(p0 - p2) 337233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d26, d8, d10 ; abs(q0 - q2) 338233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d27, d4, d7 ; abs(p3 - p0) 339233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d28, d11, d8 ; abs(q3 - q0) 340233d2500723e5594f3e7c70896ffeeef32b9c950ywan 341233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; only compare the largest value to thresh 342233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmax.u8 d25, d25, d26 ; max(abs(p0 - p2), abs(q0 - q2)) 343233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmax.u8 d26, d27, d28 ; max(abs(p3 - p0), abs(q3 - q0)) 344233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmax.u8 d25, d25, d26 345233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmax.u8 d20, d20, d25 346233d2500723e5594f3e7c70896ffeeef32b9c950ywan 347233d2500723e5594f3e7c70896ffeeef32b9c950ywan vshr.u8 d23, d23, #1 ; a = a / 2 348233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.u8 d24, d24, d23 ; a = b + a 349233d2500723e5594f3e7c70896ffeeef32b9c950ywan 350233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmov.u8 d30, #1 351233d2500723e5594f3e7c70896ffeeef32b9c950ywan vcge.u8 d24, d16, d24 ; (a > blimit * 2 + limit) * -1 352233d2500723e5594f3e7c70896ffeeef32b9c950ywan 353233d2500723e5594f3e7c70896ffeeef32b9c950ywan vcge.u8 d20, d30, d20 ; flat 354233d2500723e5594f3e7c70896ffeeef32b9c950ywan 355233d2500723e5594f3e7c70896ffeeef32b9c950ywan vand d19, d19, d24 ; mask 356233d2500723e5594f3e7c70896ffeeef32b9c950ywan 357233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; hevmask 358233d2500723e5594f3e7c70896ffeeef32b9c950ywan vcgt.u8 d21, d21, d18 ; (abs(p1 - p0) > thresh)*-1 359233d2500723e5594f3e7c70896ffeeef32b9c950ywan vcgt.u8 d22, d22, d18 ; (abs(q1 - q0) > thresh)*-1 360233d2500723e5594f3e7c70896ffeeef32b9c950ywan vorr d21, d21, d22 ; hev 361233d2500723e5594f3e7c70896ffeeef32b9c950ywan 362233d2500723e5594f3e7c70896ffeeef32b9c950ywan vand d16, d20, d19 ; flat && mask 363233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmov r5, r6, d16 364233d2500723e5594f3e7c70896ffeeef32b9c950ywan 365233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7) 366233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d22, d3, d7 ; abs(p4 - p0) 367233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d23, d12, d8 ; abs(q4 - q0) 368233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d24, d7, d2 ; abs(p0 - p5) 369233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d25, d8, d13 ; abs(q0 - q5) 370233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d26, d1, d7 ; abs(p6 - p0) 371233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d27, d14, d8 ; abs(q6 - q0) 372233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d28, d0, d7 ; abs(p7 - p0) 373233d2500723e5594f3e7c70896ffeeef32b9c950ywan vabd.u8 d29, d15, d8 ; abs(q7 - q0) 374233d2500723e5594f3e7c70896ffeeef32b9c950ywan 375233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; only compare the largest value to thresh 376233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmax.u8 d22, d22, d23 ; max(abs(p4 - p0), abs(q4 - q0)) 377233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmax.u8 d23, d24, d25 ; max(abs(p0 - p5), abs(q0 - q5)) 378233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmax.u8 d24, d26, d27 ; max(abs(p6 - p0), abs(q6 - q0)) 379233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmax.u8 d25, d28, d29 ; max(abs(p7 - p0), abs(q7 - q0)) 380233d2500723e5594f3e7c70896ffeeef32b9c950ywan 381233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmax.u8 d26, d22, d23 382233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmax.u8 d27, d24, d25 383233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmax.u8 d23, d26, d27 384233d2500723e5594f3e7c70896ffeeef32b9c950ywan 385233d2500723e5594f3e7c70896ffeeef32b9c950ywan vcge.u8 d18, d30, d23 ; flat2 386233d2500723e5594f3e7c70896ffeeef32b9c950ywan 387233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmov.u8 d22, #0x80 388233d2500723e5594f3e7c70896ffeeef32b9c950ywan 389233d2500723e5594f3e7c70896ffeeef32b9c950ywan orrs r5, r5, r6 ; Check for 0 390233d2500723e5594f3e7c70896ffeeef32b9c950ywan orreq r7, r7, #1 ; Only do filter branch 391233d2500723e5594f3e7c70896ffeeef32b9c950ywan 392233d2500723e5594f3e7c70896ffeeef32b9c950ywan vand d17, d18, d16 ; flat2 && flat && mask 393233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmov r5, r6, d17 394233d2500723e5594f3e7c70896ffeeef32b9c950ywan 395233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; mbfilter() function 396233d2500723e5594f3e7c70896ffeeef32b9c950ywan 397233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; filter() function 398233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; convert to signed 399233d2500723e5594f3e7c70896ffeeef32b9c950ywan veor d23, d8, d22 ; qs0 400233d2500723e5594f3e7c70896ffeeef32b9c950ywan veor d24, d7, d22 ; ps0 401233d2500723e5594f3e7c70896ffeeef32b9c950ywan veor d25, d6, d22 ; ps1 402233d2500723e5594f3e7c70896ffeeef32b9c950ywan veor d26, d9, d22 ; qs1 403233d2500723e5594f3e7c70896ffeeef32b9c950ywan 404233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmov.u8 d27, #3 405233d2500723e5594f3e7c70896ffeeef32b9c950ywan 406233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsub.s8 d28, d23, d24 ; ( qs0 - ps0) 407233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1) 408233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0) 409233d2500723e5594f3e7c70896ffeeef32b9c950ywan vand d29, d29, d21 ; filter &= hev 410233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0) 411233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmov.u8 d29, #4 412233d2500723e5594f3e7c70896ffeeef32b9c950ywan 413233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; filter = clamp(filter + 3 * ( qs0 - ps0)) 414233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqmovn.s16 d28, q15 415233d2500723e5594f3e7c70896ffeeef32b9c950ywan 416233d2500723e5594f3e7c70896ffeeef32b9c950ywan vand d28, d28, d19 ; filter &= mask 417233d2500723e5594f3e7c70896ffeeef32b9c950ywan 418233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s8 d30, d28, d27 ; filter2 = clamp(filter+3) 419233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s8 d29, d28, d29 ; filter1 = clamp(filter+4) 420233d2500723e5594f3e7c70896ffeeef32b9c950ywan vshr.s8 d30, d30, #3 ; filter2 >>= 3 421233d2500723e5594f3e7c70896ffeeef32b9c950ywan vshr.s8 d29, d29, #3 ; filter1 >>= 3 422233d2500723e5594f3e7c70896ffeeef32b9c950ywan 423233d2500723e5594f3e7c70896ffeeef32b9c950ywan 424233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s8 d24, d24, d30 ; op0 = clamp(ps0 + filter2) 425233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqsub.s8 d23, d23, d29 ; oq0 = clamp(qs0 - filter1) 426233d2500723e5594f3e7c70896ffeeef32b9c950ywan 427233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; outer tap adjustments: ++filter1 >> 1 428233d2500723e5594f3e7c70896ffeeef32b9c950ywan vrshr.s8 d29, d29, #1 429233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbic d29, d29, d21 ; filter &= ~hev 430233d2500723e5594f3e7c70896ffeeef32b9c950ywan 431233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqadd.s8 d25, d25, d29 ; op1 = clamp(ps1 + filter) 432233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqsub.s8 d26, d26, d29 ; oq1 = clamp(qs1 - filter) 433233d2500723e5594f3e7c70896ffeeef32b9c950ywan 434233d2500723e5594f3e7c70896ffeeef32b9c950ywan veor d24, d24, d22 ; *f_op0 = u^0x80 435233d2500723e5594f3e7c70896ffeeef32b9c950ywan veor d23, d23, d22 ; *f_oq0 = u^0x80 436233d2500723e5594f3e7c70896ffeeef32b9c950ywan veor d25, d25, d22 ; *f_op1 = u^0x80 437233d2500723e5594f3e7c70896ffeeef32b9c950ywan veor d26, d26, d22 ; *f_oq1 = u^0x80 438233d2500723e5594f3e7c70896ffeeef32b9c950ywan 439233d2500723e5594f3e7c70896ffeeef32b9c950ywan tst r7, #1 440233d2500723e5594f3e7c70896ffeeef32b9c950ywan bxne lr 441233d2500723e5594f3e7c70896ffeeef32b9c950ywan 442233d2500723e5594f3e7c70896ffeeef32b9c950ywan orrs r5, r5, r6 ; Check for 0 443233d2500723e5594f3e7c70896ffeeef32b9c950ywan orreq r7, r7, #2 ; Only do mbfilter branch 444233d2500723e5594f3e7c70896ffeeef32b9c950ywan 445233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; mbfilter flat && mask branch 446233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; TODO(fgalligan): Can I decrease the cycles shifting to consective d's 447233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; and using vibt on the q's? 448233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmov.u8 d29, #2 449233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q15, d7, d8 ; op2 = p0 + q0 450233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q15, d4, d27 ; op2 = p0 + q0 + p3 * 3 451233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q15, d5, d29 ; op2 = p0 + q0 + p3 * 3 + p2 * 2 452233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q10, d4, d5 453233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddw.u8 q15, d6 ; op2=p1 + p0 + q0 + p3 * 3 + p2 *2 454233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q14, d6, d9 455233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d18, q15, #3 ; r_op2 456233d2500723e5594f3e7c70896ffeeef32b9c950ywan 457233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsub.i16 q15, q10 458233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q10, d4, d6 459233d2500723e5594f3e7c70896ffeeef32b9c950ywan vadd.i16 q15, q14 460233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q14, d7, d10 461233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d19, q15, #3 ; r_op1 462233d2500723e5594f3e7c70896ffeeef32b9c950ywan 463233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsub.i16 q15, q10 464233d2500723e5594f3e7c70896ffeeef32b9c950ywan vadd.i16 q15, q14 465233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q14, d8, d11 466233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d20, q15, #3 ; r_op0 467233d2500723e5594f3e7c70896ffeeef32b9c950ywan 468233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsubw.u8 q15, d4 ; oq0 = op0 - p3 469233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsubw.u8 q15, d7 ; oq0 -= p0 470233d2500723e5594f3e7c70896ffeeef32b9c950ywan vadd.i16 q15, q14 471233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q14, d9, d11 472233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d21, q15, #3 ; r_oq0 473233d2500723e5594f3e7c70896ffeeef32b9c950ywan 474233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsubw.u8 q15, d5 ; oq1 = oq0 - p2 475233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsubw.u8 q15, d8 ; oq1 -= q0 476233d2500723e5594f3e7c70896ffeeef32b9c950ywan vadd.i16 q15, q14 477233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q14, d10, d11 478233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d22, q15, #3 ; r_oq1 479233d2500723e5594f3e7c70896ffeeef32b9c950ywan 480233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsubw.u8 q15, d6 ; oq2 = oq0 - p1 481233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsubw.u8 q15, d9 ; oq2 -= q1 482233d2500723e5594f3e7c70896ffeeef32b9c950ywan vadd.i16 q15, q14 483233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d27, q15, #3 ; r_oq2 484233d2500723e5594f3e7c70896ffeeef32b9c950ywan 485233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; Filter does not set op2 or oq2, so use p2 and q2. 486233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d18, d5, d16 ; t_op2 |= p2 & ~(flat & mask) 487233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d19, d25, d16 ; t_op1 |= f_op1 & ~(flat & mask) 488233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d20, d24, d16 ; t_op0 |= f_op0 & ~(flat & mask) 489233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d21, d23, d16 ; t_oq0 |= f_oq0 & ~(flat & mask) 490233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d22, d26, d16 ; t_oq1 |= f_oq1 & ~(flat & mask) 491233d2500723e5594f3e7c70896ffeeef32b9c950ywan 492233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbit d23, d27, d16 ; t_oq2 |= r_oq2 & (flat & mask) 493233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d23, d10, d16 ; t_oq2 |= q2 & ~(flat & mask) 494233d2500723e5594f3e7c70896ffeeef32b9c950ywan 495233d2500723e5594f3e7c70896ffeeef32b9c950ywan tst r7, #2 496233d2500723e5594f3e7c70896ffeeef32b9c950ywan bxne lr 497233d2500723e5594f3e7c70896ffeeef32b9c950ywan 498233d2500723e5594f3e7c70896ffeeef32b9c950ywan ; wide_mbfilter flat2 && flat && mask branch 499233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmov.u8 d16, #7 500233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q15, d7, d8 ; op6 = p0 + q0 501233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q12, d2, d3 502233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q13, d4, d5 503233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q14, d1, d6 504233d2500723e5594f3e7c70896ffeeef32b9c950ywan vmlal.u8 q15, d0, d16 ; op6 += p7 * 3 505233d2500723e5594f3e7c70896ffeeef32b9c950ywan vadd.i16 q12, q13 506233d2500723e5594f3e7c70896ffeeef32b9c950ywan vadd.i16 q15, q14 507233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q14, d2, d9 508233d2500723e5594f3e7c70896ffeeef32b9c950ywan vadd.i16 q15, q12 509233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q12, d0, d1 510233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddw.u8 q15, d1 511233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q13, d0, d2 512233d2500723e5594f3e7c70896ffeeef32b9c950ywan vadd.i16 q14, q15, q14 513233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d16, q15, #4 ; w_op6 514233d2500723e5594f3e7c70896ffeeef32b9c950ywan 515233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsub.i16 q15, q14, q12 516233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q14, d3, d10 517233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d24, q15, #4 ; w_op5 518233d2500723e5594f3e7c70896ffeeef32b9c950ywan 519233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsub.i16 q15, q13 520233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q13, d0, d3 521233d2500723e5594f3e7c70896ffeeef32b9c950ywan vadd.i16 q15, q14 522233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q14, d4, d11 523233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d25, q15, #4 ; w_op4 524233d2500723e5594f3e7c70896ffeeef32b9c950ywan 525233d2500723e5594f3e7c70896ffeeef32b9c950ywan vadd.i16 q15, q14 526233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q14, d0, d4 527233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsub.i16 q15, q13 528233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsub.i16 q14, q15, q14 529233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d26, q15, #4 ; w_op3 530233d2500723e5594f3e7c70896ffeeef32b9c950ywan 531233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddw.u8 q15, q14, d5 ; op2 += p2 532233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q14, d0, d5 533233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddw.u8 q15, d12 ; op2 += q4 534233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d26, d4, d17 ; op3 |= p3 & ~(f2 & f & m) 535233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d27, q15, #4 ; w_op2 536233d2500723e5594f3e7c70896ffeeef32b9c950ywan 537233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsub.i16 q15, q14 538233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q14, d0, d6 539233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddw.u8 q15, d6 ; op1 += p1 540233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddw.u8 q15, d13 ; op1 += q5 541233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d27, d18, d17 ; op2 |= t_op2 & ~(f2 & f & m) 542233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d18, q15, #4 ; w_op1 543233d2500723e5594f3e7c70896ffeeef32b9c950ywan 544233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsub.i16 q15, q14 545233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q14, d0, d7 546233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddw.u8 q15, d7 ; op0 += p0 547233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddw.u8 q15, d14 ; op0 += q6 548233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d18, d19, d17 ; op1 |= t_op1 & ~(f2 & f & m) 549233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d19, q15, #4 ; w_op0 550233d2500723e5594f3e7c70896ffeeef32b9c950ywan 551233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsub.i16 q15, q14 552233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q14, d1, d8 553233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddw.u8 q15, d8 ; oq0 += q0 554233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddw.u8 q15, d15 ; oq0 += q7 555233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d19, d20, d17 ; op0 |= t_op0 & ~(f2 & f & m) 556233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d20, q15, #4 ; w_oq0 557233d2500723e5594f3e7c70896ffeeef32b9c950ywan 558233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsub.i16 q15, q14 559233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q14, d2, d9 560233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddw.u8 q15, d9 ; oq1 += q1 561233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q4, d10, d15 562233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddw.u8 q15, d15 ; oq1 += q7 563233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d20, d21, d17 ; oq0 |= t_oq0 & ~(f2 & f & m) 564233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d21, q15, #4 ; w_oq1 565233d2500723e5594f3e7c70896ffeeef32b9c950ywan 566233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsub.i16 q15, q14 567233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q14, d3, d10 568233d2500723e5594f3e7c70896ffeeef32b9c950ywan vadd.i16 q15, q4 569233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q4, d11, d15 570233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d21, d22, d17 ; oq1 |= t_oq1 & ~(f2 & f & m) 571233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d22, q15, #4 ; w_oq2 572233d2500723e5594f3e7c70896ffeeef32b9c950ywan 573233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsub.i16 q15, q14 574233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q14, d4, d11 575233d2500723e5594f3e7c70896ffeeef32b9c950ywan vadd.i16 q15, q4 576233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q4, d12, d15 577233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d22, d23, d17 ; oq2 |= t_oq2 & ~(f2 & f & m) 578233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d23, q15, #4 ; w_oq3 579233d2500723e5594f3e7c70896ffeeef32b9c950ywan 580233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsub.i16 q15, q14 581233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q14, d5, d12 582233d2500723e5594f3e7c70896ffeeef32b9c950ywan vadd.i16 q15, q4 583233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q4, d13, d15 584233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d16, d1, d17 ; op6 |= p6 & ~(f2 & f & m) 585233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d1, q15, #4 ; w_oq4 586233d2500723e5594f3e7c70896ffeeef32b9c950ywan 587233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsub.i16 q15, q14 588233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q14, d6, d13 589233d2500723e5594f3e7c70896ffeeef32b9c950ywan vadd.i16 q15, q4 590233d2500723e5594f3e7c70896ffeeef32b9c950ywan vaddl.u8 q4, d14, d15 591233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d24, d2, d17 ; op5 |= p5 & ~(f2 & f & m) 592233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d2, q15, #4 ; w_oq5 593233d2500723e5594f3e7c70896ffeeef32b9c950ywan 594233d2500723e5594f3e7c70896ffeeef32b9c950ywan vsub.i16 q15, q14 595233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d25, d3, d17 ; op4 |= p4 & ~(f2 & f & m) 596233d2500723e5594f3e7c70896ffeeef32b9c950ywan vadd.i16 q15, q4 597233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d23, d11, d17 ; oq3 |= q3 & ~(f2 & f & m) 598233d2500723e5594f3e7c70896ffeeef32b9c950ywan vqrshrn.u16 d3, q15, #4 ; w_oq6 599233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d1, d12, d17 ; oq4 |= q4 & ~(f2 & f & m) 600233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d2, d13, d17 ; oq5 |= q5 & ~(f2 & f & m) 601233d2500723e5594f3e7c70896ffeeef32b9c950ywan vbif d3, d14, d17 ; oq6 |= q6 & ~(f2 & f & m) 602233d2500723e5594f3e7c70896ffeeef32b9c950ywan 603233d2500723e5594f3e7c70896ffeeef32b9c950ywan bx lr 604233d2500723e5594f3e7c70896ffeeef32b9c950ywan ENDP ; |vp9_wide_mbfilter_neon| 605233d2500723e5594f3e7c70896ffeeef32b9c950ywan 606233d2500723e5594f3e7c70896ffeeef32b9c950ywan END 607