17ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; 27ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; Copyright (c) 2013 The WebM project authors. All Rights Reserved. 37ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; 47ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; Use of this source code is governed by a BSD-style license 57ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; that can be found in the LICENSE file in the root of the source 67ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; tree. An additional intellectual property rights grant can be found 77ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; in the file PATENTS. All contributing project authors may 87ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; be found in the AUTHORS file in the root of the source tree. 97ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; 107ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 117ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian EXPORT |vpx_lpf_horizontal_8_neon| 127bc9febe8749e98a3812a0dc4380ceae75c29450Johann EXPORT |vpx_lpf_horizontal_8_dual_neon| 137ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian EXPORT |vpx_lpf_vertical_8_neon| 147bc9febe8749e98a3812a0dc4380ceae75c29450Johann EXPORT |vpx_lpf_vertical_8_dual_neon| 157ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ARM 167ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 177ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian AREA ||.text||, CODE, READONLY, ALIGN=2 187ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 197ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; Currently vpx only works on iterations 8 at a time. The vp8 loop filter 207ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; works on 16 iterations at a time. 217ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; 227ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; void vpx_lpf_horizontal_8_neon(uint8_t *s, int p, 237ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; const uint8_t *blimit, 247ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; const uint8_t *limit, 2568e1c830ade592be74773e249bf94e2bbfb50de7Johann; const uint8_t *thresh) 267ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; r0 uint8_t *s, 277ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; r1 int p, /* pitch */ 287ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; r2 const uint8_t *blimit, 297ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; r3 const uint8_t *limit, 307ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; sp const uint8_t *thresh, 317ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian|vpx_lpf_horizontal_8_neon| PROC 327ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian push {r4-r5, lr} 337ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 347ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.8 {d0[]}, [r2] ; duplicate *blimit 357ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ldr r2, [sp, #12] ; load thresh 367ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian add r1, r1, r1 ; double pitch 377ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 387ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.8 {d1[]}, [r3] ; duplicate *limit 397ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.8 {d2[]}, [r2] ; duplicate *thresh 407ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 417ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian sub r3, r0, r1, lsl #1 ; move src pointer down by 4 lines 427ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian add r2, r3, r1, lsr #1 ; set to 3 lines down 437ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 447ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.u8 {d3}, [r3@64], r1 ; p3 457ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.u8 {d4}, [r2@64], r1 ; p2 467ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.u8 {d5}, [r3@64], r1 ; p1 477ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.u8 {d6}, [r2@64], r1 ; p0 487ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.u8 {d7}, [r3@64], r1 ; q0 497ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.u8 {d16}, [r2@64], r1 ; q1 507ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.u8 {d17}, [r3@64] ; q2 517ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.u8 {d18}, [r2@64], r1 ; q3 527ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 537ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian sub r3, r3, r1, lsl #1 547ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian sub r2, r2, r1, lsl #2 557ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 567ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian bl vpx_mbloop_filter_neon 577ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 587ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst1.u8 {d0}, [r2@64], r1 ; store op2 597ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst1.u8 {d1}, [r3@64], r1 ; store op1 607ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst1.u8 {d2}, [r2@64], r1 ; store op0 617ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst1.u8 {d3}, [r3@64], r1 ; store oq0 627ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst1.u8 {d4}, [r2@64], r1 ; store oq1 637ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst1.u8 {d5}, [r3@64], r1 ; store oq2 647ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 657ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian pop {r4-r5, pc} 667ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 677ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ENDP ; |vpx_lpf_horizontal_8_neon| 687ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 697bc9febe8749e98a3812a0dc4380ceae75c29450Johann;void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, 707bc9febe8749e98a3812a0dc4380ceae75c29450Johann; int p, 717bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *blimit0, 727bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *limit0, 737bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *thresh0, 747bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *blimit1, 757bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *limit1, 767bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *thresh1) 777bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r0 uint8_t *s, 787bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r1 int p, /* pitch */ 797bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r2 const uint8_t *blimit0, 807bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r3 const uint8_t *limit0, 817bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp const uint8_t *thresh0, 827bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp + 4 const uint8_t *blimit1, 837bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp + 8 const uint8_t *limit1, 847bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp + 12 const uint8_t *thresh1, 857bc9febe8749e98a3812a0dc4380ceae75c29450Johann|vpx_lpf_horizontal_8_dual_neon| PROC 867bc9febe8749e98a3812a0dc4380ceae75c29450Johann push {r0-r1, lr} 877bc9febe8749e98a3812a0dc4380ceae75c29450Johann ldr lr, [sp, #12] 887bc9febe8749e98a3812a0dc4380ceae75c29450Johann push {lr} ; thresh0 897bc9febe8749e98a3812a0dc4380ceae75c29450Johann bl vpx_lpf_horizontal_8_neon 907bc9febe8749e98a3812a0dc4380ceae75c29450Johann 917bc9febe8749e98a3812a0dc4380ceae75c29450Johann ldr r2, [sp, #20] ; blimit1 927bc9febe8749e98a3812a0dc4380ceae75c29450Johann ldr r3, [sp, #24] ; limit1 937bc9febe8749e98a3812a0dc4380ceae75c29450Johann ldr lr, [sp, #28] 947bc9febe8749e98a3812a0dc4380ceae75c29450Johann str lr, [sp, #16] ; thresh1 957bc9febe8749e98a3812a0dc4380ceae75c29450Johann add sp, #4 967bc9febe8749e98a3812a0dc4380ceae75c29450Johann pop {r0-r1, lr} 977bc9febe8749e98a3812a0dc4380ceae75c29450Johann add r0, #8 ; s + 8 987bc9febe8749e98a3812a0dc4380ceae75c29450Johann b vpx_lpf_horizontal_8_neon 997bc9febe8749e98a3812a0dc4380ceae75c29450Johann ENDP ; |vpx_lpf_horizontal_8_dual_neon| 1007bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1017ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; void vpx_lpf_vertical_8_neon(uint8_t *s, 1027ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; int pitch, 1037ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; const uint8_t *blimit, 1047ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; const uint8_t *limit, 10568e1c830ade592be74773e249bf94e2bbfb50de7Johann; const uint8_t *thresh) 1067ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; 1077ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; r0 uint8_t *s, 1087ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; r1 int pitch, 1097ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; r2 const uint8_t *blimit, 1107ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; r3 const uint8_t *limit, 1117ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; sp const uint8_t *thresh, 1127ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian|vpx_lpf_vertical_8_neon| PROC 1137ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian push {r4-r5, lr} 1147ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 1157ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.8 {d0[]}, [r2] ; duplicate *blimit 1167ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.8 {d1[]}, [r3] ; duplicate *limit 1177ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 1187ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ldr r3, [sp, #12] ; load thresh 1197ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian sub r2, r0, #4 ; move s pointer down by 4 columns 1207ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 1217ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.8 {d2[]}, [r3] ; duplicate *thresh 1227ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 1237ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.u8 {d3}, [r2], r1 ; load s data 1247ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.u8 {d4}, [r2], r1 1257ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.u8 {d5}, [r2], r1 1267ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.u8 {d6}, [r2], r1 1277ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.u8 {d7}, [r2], r1 1287ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.u8 {d16}, [r2], r1 1297ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.u8 {d17}, [r2], r1 1307ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vld1.u8 {d18}, [r2] 1317ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 1327ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ;transpose to 8x16 matrix 1337ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vtrn.32 d3, d7 1347ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vtrn.32 d4, d16 1357ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vtrn.32 d5, d17 1367ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vtrn.32 d6, d18 1377ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 1387ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vtrn.16 d3, d5 1397ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vtrn.16 d4, d6 1407ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vtrn.16 d7, d17 1417ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vtrn.16 d16, d18 1427ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 1437ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vtrn.8 d3, d4 1447ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vtrn.8 d5, d6 1457ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vtrn.8 d7, d16 1467ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vtrn.8 d17, d18 1477ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 1487ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian sub r2, r0, #3 1497ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian add r3, r0, #1 1507ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 1517ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian bl vpx_mbloop_filter_neon 1527ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 1537ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ;store op2, op1, op0, oq0 1547ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r2], r1 1557ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst4.8 {d0[1], d1[1], d2[1], d3[1]}, [r2], r1 1567ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst4.8 {d0[2], d1[2], d2[2], d3[2]}, [r2], r1 1577ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst4.8 {d0[3], d1[3], d2[3], d3[3]}, [r2], r1 1587ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst4.8 {d0[4], d1[4], d2[4], d3[4]}, [r2], r1 1597ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst4.8 {d0[5], d1[5], d2[5], d3[5]}, [r2], r1 1607ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst4.8 {d0[6], d1[6], d2[6], d3[6]}, [r2], r1 1617ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst4.8 {d0[7], d1[7], d2[7], d3[7]}, [r2] 1627ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 1637ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ;store oq1, oq2 1647ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst2.8 {d4[0], d5[0]}, [r3], r1 1657ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst2.8 {d4[1], d5[1]}, [r3], r1 1667ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst2.8 {d4[2], d5[2]}, [r3], r1 1677ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst2.8 {d4[3], d5[3]}, [r3], r1 1687ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst2.8 {d4[4], d5[4]}, [r3], r1 1697ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst2.8 {d4[5], d5[5]}, [r3], r1 1707ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst2.8 {d4[6], d5[6]}, [r3], r1 1717ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vst2.8 {d4[7], d5[7]}, [r3] 1727ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 1737ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian pop {r4-r5, pc} 1747ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ENDP ; |vpx_lpf_vertical_8_neon| 1757ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 1767bc9febe8749e98a3812a0dc4380ceae75c29450Johann;void vpx_lpf_vertical_8_dual_neon(uint8_t *s, 1777bc9febe8749e98a3812a0dc4380ceae75c29450Johann; int pitch, 1787bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *blimit0, 1797bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *limit0, 1807bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *thresh0, 1817bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *blimit1, 1827bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *limit1, 1837bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *thresh1) 1847bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r0 uint8_t *s, 1857bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r1 int pitch 1867bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r2 const uint8_t *blimit0, 1877bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r3 const uint8_t *limit0, 1887bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp const uint8_t *thresh0, 1897bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp + 4 const uint8_t *blimit1, 1907bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp + 8 const uint8_t *limit1, 1917bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp + 12 const uint8_t *thresh1, 1927bc9febe8749e98a3812a0dc4380ceae75c29450Johann|vpx_lpf_vertical_8_dual_neon| PROC 1937bc9febe8749e98a3812a0dc4380ceae75c29450Johann push {r0-r1, lr} 1947bc9febe8749e98a3812a0dc4380ceae75c29450Johann ldr lr, [sp, #12] 1957bc9febe8749e98a3812a0dc4380ceae75c29450Johann push {lr} ; thresh0 1967bc9febe8749e98a3812a0dc4380ceae75c29450Johann bl vpx_lpf_vertical_8_neon 1977bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1987bc9febe8749e98a3812a0dc4380ceae75c29450Johann ldr r2, [sp, #20] ; blimit1 1997bc9febe8749e98a3812a0dc4380ceae75c29450Johann ldr r3, [sp, #24] ; limit1 2007bc9febe8749e98a3812a0dc4380ceae75c29450Johann ldr lr, [sp, #28] 2017bc9febe8749e98a3812a0dc4380ceae75c29450Johann str lr, [sp, #16] ; thresh1 2027bc9febe8749e98a3812a0dc4380ceae75c29450Johann add sp, #4 2037bc9febe8749e98a3812a0dc4380ceae75c29450Johann pop {r0-r1, lr} 2047bc9febe8749e98a3812a0dc4380ceae75c29450Johann add r0, r1, lsl #3 ; s + 8 * pitch 2057bc9febe8749e98a3812a0dc4380ceae75c29450Johann b vpx_lpf_vertical_8_neon 2067bc9febe8749e98a3812a0dc4380ceae75c29450Johann ENDP ; |vpx_lpf_vertical_8_dual_neon| 2077bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2087ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; void vpx_mbloop_filter_neon(); 2097ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; This is a helper function for the loopfilters. The invidual functions do the 2107ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; necessary load, transpose (if necessary) and store. The function does not use 2117ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; registers d8-d15. 2127ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; 2137ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; Inputs: 2147ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; r0-r3, r12 PRESERVE 2157ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; d0 blimit 2167ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; d1 limit 2177ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; d2 thresh 2187ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; d3 p3 2197ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; d4 p2 2207ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; d5 p1 2217ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; d6 p0 2227ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; d7 q0 2237ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; d16 q1 2247ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; d17 q2 2257ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; d18 q3 2267ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; 2277ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; Outputs: 2287ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; d0 op2 2297ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; d1 op1 2307ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; d2 op0 2317ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; d3 oq0 2327ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; d4 oq1 2337ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian; d5 oq2 2347ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian|vpx_mbloop_filter_neon| PROC 2357ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; filter_mask 2367ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vabd.u8 d19, d3, d4 ; m1 = abs(p3 - p2) 2377ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vabd.u8 d20, d4, d5 ; m2 = abs(p2 - p1) 2387ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vabd.u8 d21, d5, d6 ; m3 = abs(p1 - p0) 2397ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vabd.u8 d22, d16, d7 ; m4 = abs(q1 - q0) 2407ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vabd.u8 d23, d17, d16 ; m5 = abs(q2 - q1) 2417ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vabd.u8 d24, d18, d17 ; m6 = abs(q3 - q2) 2427ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2437ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; only compare the largest value to limit 2447ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmax.u8 d19, d19, d20 ; m1 = max(m1, m2) 2457ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmax.u8 d20, d21, d22 ; m2 = max(m3, m4) 2467ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2477ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vabd.u8 d25, d6, d4 ; m7 = abs(p0 - p2) 2487ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2497ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmax.u8 d23, d23, d24 ; m3 = max(m5, m6) 2507ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2517ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vabd.u8 d26, d7, d17 ; m8 = abs(q0 - q2) 2527ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2537ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmax.u8 d19, d19, d20 2547ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2557ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vabd.u8 d24, d6, d7 ; m9 = abs(p0 - q0) 2567ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vabd.u8 d27, d3, d6 ; m10 = abs(p3 - p0) 2577ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vabd.u8 d28, d18, d7 ; m11 = abs(q3 - q0) 2587ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2597ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmax.u8 d19, d19, d23 2607ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2617ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vabd.u8 d23, d5, d16 ; a = abs(p1 - q1) 2627ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqadd.u8 d24, d24, d24 ; b = abs(p0 - q0) * 2 2637ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2647ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; abs () > limit 2657ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vcge.u8 d19, d1, d19 2667ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2677ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; only compare the largest value to thresh 2687ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmax.u8 d25, d25, d26 ; m4 = max(m7, m8) 2697ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmax.u8 d26, d27, d28 ; m5 = max(m10, m11) 2707ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2717ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vshr.u8 d23, d23, #1 ; a = a / 2 2727ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2737ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmax.u8 d25, d25, d26 ; m4 = max(m4, m5) 2747ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2757ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqadd.u8 d24, d24, d23 ; a = b + a 2767ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2777ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmax.u8 d20, d20, d25 ; m2 = max(m2, m4) 2787ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2797ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmov.u8 d23, #1 2807ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vcge.u8 d24, d0, d24 ; a > blimit 2817ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2827ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vcgt.u8 d21, d21, d2 ; (abs(p1 - p0) > thresh)*-1 2837ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2847ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vcge.u8 d20, d23, d20 ; flat 2857ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2867ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vand d19, d19, d24 ; mask 2877ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2887ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vcgt.u8 d23, d22, d2 ; (abs(q1 - q0) > thresh)*-1 2897ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2907ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vand d20, d20, d19 ; flat & mask 2917ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2927ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmov.u8 d22, #0x80 2937ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2947ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vorr d23, d21, d23 ; hev 2957ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 2967ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; This instruction will truncate the "flat & mask" masks down to 4 bits 2977ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; each to fit into one 32 bit arm register. The values are stored in 2987ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; q10.64[0]. 2997ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vshrn.u16 d30, q10, #4 3007ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmov.u32 r4, d30[0] ; flat & mask 4bits 3017ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3027ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian adds r5, r4, #1 ; Check for all 1's 3037ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3047ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; If mask and flat are 1's for all vectors, then we only need to execute 3057ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; the power branch for all vectors. 3067ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian beq power_branch_only 3077ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3087ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian cmp r4, #0 ; Check for 0, set flag for later 3097ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3107ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; mbfilter() function 3117ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; filter() function 3127ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; convert to signed 3137ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian veor d21, d7, d22 ; qs0 3147ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian veor d24, d6, d22 ; ps0 3157ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian veor d25, d5, d22 ; ps1 3167ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian veor d26, d16, d22 ; qs1 3177ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3187ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmov.u8 d27, #3 3197ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3207ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsub.s8 d28, d21, d24 ; ( qs0 - ps0) 3217ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3227ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1) 3237ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3247ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0) 3257ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3267ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vand d29, d29, d23 ; filter &= hev 3277ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3287ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0) 3297ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3307ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmov.u8 d29, #4 3317ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3327ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; filter = clamp(filter + 3 * ( qs0 - ps0)) 3337ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqmovn.s16 d28, q15 3347ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3357ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vand d28, d28, d19 ; filter &= mask 3367ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3377ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqadd.s8 d30, d28, d27 ; filter2 = clamp(filter+3) 3387ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqadd.s8 d29, d28, d29 ; filter1 = clamp(filter+4) 3397ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vshr.s8 d30, d30, #3 ; filter2 >>= 3 3407ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vshr.s8 d29, d29, #3 ; filter1 >>= 3 3417ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3427ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqadd.s8 d24, d24, d30 ; op0 = clamp(ps0 + filter2) 3437ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqsub.s8 d21, d21, d29 ; oq0 = clamp(qs0 - filter1) 3447ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3457ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; outer tap adjustments: ++filter1 >> 1 3467ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vrshr.s8 d29, d29, #1 3477ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vbic d29, d29, d23 ; filter &= ~hev 3487ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3497ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqadd.s8 d25, d25, d29 ; op1 = clamp(ps1 + filter) 3507ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqsub.s8 d26, d26, d29 ; oq1 = clamp(qs1 - filter) 3517ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3527ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; If mask and flat are 0's for all vectors, then we only need to execute 3537ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; the filter branch for all vectors. 3547ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian beq filter_branch_only 3557ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3567ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; If mask and flat are mixed then we must perform both branches and 3577ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; combine the data. 3587ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian veor d24, d24, d22 ; *f_op0 = u^0x80 3597ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian veor d21, d21, d22 ; *f_oq0 = u^0x80 3607ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian veor d25, d25, d22 ; *f_op1 = u^0x80 3617ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian veor d26, d26, d22 ; *f_oq1 = u^0x80 3627ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3637ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; At this point we have already executed the filter branch. The filter 3647ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; branch does not set op2 or oq2, so use p2 and q2. Execute the power 3657ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; branch and combine the data. 3667ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmov.u8 d23, #2 3677ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddl.u8 q14, d6, d7 ; r_op2 = p0 + q0 3687ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmlal.u8 q14, d3, d27 ; r_op2 += p3 * 3 3697ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmlal.u8 q14, d4, d23 ; r_op2 += p2 * 2 3707ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3717ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vbif d0, d4, d20 ; op2 |= p2 & ~(flat & mask) 3727ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3737ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d5 ; r_op2 += p1 3747ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3757ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vbif d1, d25, d20 ; op1 |= f_op1 & ~(flat & mask) 3767ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3777ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqrshrn.u16 d30, q14, #3 ; r_op2 3787ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3797ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d3 ; r_op1 = r_op2 - p3 3807ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d4 ; r_op1 -= p2 3817ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d5 ; r_op1 += p1 3827ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d16 ; r_op1 += q1 3837ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3847ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vbif d2, d24, d20 ; op0 |= f_op0 & ~(flat & mask) 3857ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3867ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqrshrn.u16 d31, q14, #3 ; r_op1 3877ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3887ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d3 ; r_op0 = r_op1 - p3 3897ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d5 ; r_op0 -= p1 3907ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d6 ; r_op0 += p0 3917ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d17 ; r_op0 += q2 3927ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3937ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vbit d0, d30, d20 ; op2 |= r_op2 & (flat & mask) 3947ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3957ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqrshrn.u16 d23, q14, #3 ; r_op0 3967ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 3977ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d3 ; r_oq0 = r_op0 - p3 3987ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d6 ; r_oq0 -= p0 3997ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d7 ; r_oq0 += q0 4007ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4017ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vbit d1, d31, d20 ; op1 |= r_op1 & (flat & mask) 4027ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4037ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d18 ; oq0 += q3 4047ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4057ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vbit d2, d23, d20 ; op0 |= r_op0 & (flat & mask) 4067ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4077ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqrshrn.u16 d22, q14, #3 ; r_oq0 4087ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4097ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d4 ; r_oq1 = r_oq0 - p2 4107ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d7 ; r_oq1 -= q0 4117ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d16 ; r_oq1 += q1 4127ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4137ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vbif d3, d21, d20 ; oq0 |= f_oq0 & ~(flat & mask) 4147ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4157ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d18 ; r_oq1 += q3 4167ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4177ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vbif d4, d26, d20 ; oq1 |= f_oq1 & ~(flat & mask) 4187ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4197ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqrshrn.u16 d6, q14, #3 ; r_oq1 4207ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4217ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d5 ; r_oq2 = r_oq1 - p1 4227ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d16 ; r_oq2 -= q1 4237ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d17 ; r_oq2 += q2 4247ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d18 ; r_oq2 += q3 4257ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4267ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vbif d5, d17, d20 ; oq2 |= q2 & ~(flat & mask) 4277ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4287ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqrshrn.u16 d7, q14, #3 ; r_oq2 4297ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4307ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vbit d3, d22, d20 ; oq0 |= r_oq0 & (flat & mask) 4317ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vbit d4, d6, d20 ; oq1 |= r_oq1 & (flat & mask) 4327ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vbit d5, d7, d20 ; oq2 |= r_oq2 & (flat & mask) 4337ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4347ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian bx lr 4357ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4367ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanianpower_branch_only 4377ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmov.u8 d27, #3 4387ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmov.u8 d21, #2 4397ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddl.u8 q14, d6, d7 ; op2 = p0 + q0 4407ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmlal.u8 q14, d3, d27 ; op2 += p3 * 3 4417ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vmlal.u8 q14, d4, d21 ; op2 += p2 * 2 4427ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d5 ; op2 += p1 4437ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqrshrn.u16 d0, q14, #3 ; op2 4447ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4457ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d3 ; op1 = op2 - p3 4467ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d4 ; op1 -= p2 4477ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d5 ; op1 += p1 4487ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d16 ; op1 += q1 4497ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqrshrn.u16 d1, q14, #3 ; op1 4507ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4517ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d3 ; op0 = op1 - p3 4527ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d5 ; op0 -= p1 4537ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d6 ; op0 += p0 4547ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d17 ; op0 += q2 4557ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqrshrn.u16 d2, q14, #3 ; op0 4567ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4577ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d3 ; oq0 = op0 - p3 4587ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d6 ; oq0 -= p0 4597ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d7 ; oq0 += q0 4607ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d18 ; oq0 += q3 4617ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqrshrn.u16 d3, q14, #3 ; oq0 4627ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4637ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d4 ; oq1 = oq0 - p2 4647ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d7 ; oq1 -= q0 4657ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d16 ; oq1 += q1 4667ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d18 ; oq1 += q3 4677ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqrshrn.u16 d4, q14, #3 ; oq1 4687ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4697ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d5 ; oq2 = oq1 - p1 4707ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vsubw.u8 q14, d16 ; oq2 -= q1 4717ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d17 ; oq2 += q2 4727ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vaddw.u8 q14, d18 ; oq2 += q3 4737ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vqrshrn.u16 d5, q14, #3 ; oq2 4747ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4757ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian bx lr 4767ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4777ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanianfilter_branch_only 4787ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; TODO(fgalligan): See if we can rearange registers so we do not need to 4797ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ; do the 2 vswp. 4807ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vswp d0, d4 ; op2 4817ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian vswp d5, d17 ; oq2 4827ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian veor d2, d24, d22 ; *op0 = u^0x80 4837ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian veor d3, d21, d22 ; *oq0 = u^0x80 4847ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian veor d1, d25, d22 ; *op1 = u^0x80 4857ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian veor d4, d26, d22 ; *oq1 = u^0x80 4867ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4877ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian bx lr 4887ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4897ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian ENDP ; |vpx_mbloop_filter_neon| 4907ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian 4917ce0a1d1337c01056ba24006efab21f00e179e04Vignesh Venkatasubramanian END 492