1b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian; 2b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian; Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian; 4b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian; Use of this source code is governed by a BSD-style license 5b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian; that can be found in the LICENSE file in the root of the source 6b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian; tree. An additional intellectual property rights grant can be found 7b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian; in the file PATENTS. All contributing project authors may 8b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian; be found in the AUTHORS file in the root of the source tree. 9b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian; 10b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 117bc9febe8749e98a3812a0dc4380ceae75c29450Johann EXPORT |vpx_lpf_horizontal_16_neon| 127bc9febe8749e98a3812a0dc4380ceae75c29450Johann EXPORT |vpx_lpf_horizontal_16_dual_neon| 137bc9febe8749e98a3812a0dc4380ceae75c29450Johann EXPORT |vpx_lpf_vertical_16_neon| 147bc9febe8749e98a3812a0dc4380ceae75c29450Johann EXPORT |vpx_lpf_vertical_16_dual_neon| 15b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian ARM 16b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 17b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian AREA ||.text||, CODE, READONLY, ALIGN=2 18b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 197bc9febe8749e98a3812a0dc4380ceae75c29450Johann; void mb_lpf_horizontal_edge(uint8_t *s, int p, 207bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *blimit, 217bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *limit, 227bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *thresh, 237bc9febe8749e98a3812a0dc4380ceae75c29450Johann; int count) 24b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian; r0 uint8_t *s, 257bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r1 int p, /* pitch */ 267bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r2 const uint8_t *blimit, 277bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r3 const uint8_t *limit, 287bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp const uint8_t *thresh, 297bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r12 int count 307bc9febe8749e98a3812a0dc4380ceae75c29450Johann|mb_lpf_horizontal_edge| PROC 317bc9febe8749e98a3812a0dc4380ceae75c29450Johann push {r4-r8, lr} 327bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpush {d8-d15} 337bc9febe8749e98a3812a0dc4380ceae75c29450Johann ldr r4, [sp, #88] ; load thresh 347bc9febe8749e98a3812a0dc4380ceae75c29450Johann 357bc9febe8749e98a3812a0dc4380ceae75c29450Johannh_count 367bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d16[]}, [r2] ; load *blimit 377bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d17[]}, [r3] ; load *limit 387bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d18[]}, [r4] ; load *thresh 397bc9febe8749e98a3812a0dc4380ceae75c29450Johann 407bc9febe8749e98a3812a0dc4380ceae75c29450Johann sub r8, r0, r1, lsl #3 ; move src pointer down by 8 lines 417bc9febe8749e98a3812a0dc4380ceae75c29450Johann 427bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.u8 {d0}, [r8@64], r1 ; p7 437bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.u8 {d1}, [r8@64], r1 ; p6 447bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.u8 {d2}, [r8@64], r1 ; p5 457bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.u8 {d3}, [r8@64], r1 ; p4 467bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.u8 {d4}, [r8@64], r1 ; p3 477bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.u8 {d5}, [r8@64], r1 ; p2 487bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.u8 {d6}, [r8@64], r1 ; p1 497bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.u8 {d7}, [r8@64], r1 ; p0 507bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.u8 {d8}, [r8@64], r1 ; q0 517bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.u8 {d9}, [r8@64], r1 ; q1 527bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.u8 {d10}, [r8@64], r1 ; q2 537bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.u8 {d11}, [r8@64], r1 ; q3 547bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.u8 {d12}, [r8@64], r1 ; q4 557bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.u8 {d13}, [r8@64], r1 ; q5 567bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.u8 {d14}, [r8@64], r1 ; q6 577bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.u8 {d15}, [r8@64], r1 ; q7 587bc9febe8749e98a3812a0dc4380ceae75c29450Johann 597bc9febe8749e98a3812a0dc4380ceae75c29450Johann bl vpx_wide_mbfilter_neon 607bc9febe8749e98a3812a0dc4380ceae75c29450Johann 617bc9febe8749e98a3812a0dc4380ceae75c29450Johann tst r7, #1 627bc9febe8749e98a3812a0dc4380ceae75c29450Johann beq h_mbfilter 637bc9febe8749e98a3812a0dc4380ceae75c29450Johann 647bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; flat && mask were not set for any of the channels. Just store the values 657bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; from filter. 667bc9febe8749e98a3812a0dc4380ceae75c29450Johann sub r8, r0, r1, lsl #1 677bc9febe8749e98a3812a0dc4380ceae75c29450Johann 687bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d25}, [r8@64], r1 ; store op1 697bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d24}, [r8@64], r1 ; store op0 707bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d23}, [r8@64], r1 ; store oq0 717bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d26}, [r8@64], r1 ; store oq1 727bc9febe8749e98a3812a0dc4380ceae75c29450Johann 737bc9febe8749e98a3812a0dc4380ceae75c29450Johann b h_next 747bc9febe8749e98a3812a0dc4380ceae75c29450Johann 757bc9febe8749e98a3812a0dc4380ceae75c29450Johannh_mbfilter 767bc9febe8749e98a3812a0dc4380ceae75c29450Johann tst r7, #2 777bc9febe8749e98a3812a0dc4380ceae75c29450Johann beq h_wide_mbfilter 787bc9febe8749e98a3812a0dc4380ceae75c29450Johann 797bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; flat2 was not set for any of the channels. Just store the values from 807bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; mbfilter. 817bc9febe8749e98a3812a0dc4380ceae75c29450Johann sub r8, r0, r1, lsl #1 827bc9febe8749e98a3812a0dc4380ceae75c29450Johann sub r8, r8, r1 837bc9febe8749e98a3812a0dc4380ceae75c29450Johann 847bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d18}, [r8@64], r1 ; store op2 857bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d19}, [r8@64], r1 ; store op1 867bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d20}, [r8@64], r1 ; store op0 877bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d21}, [r8@64], r1 ; store oq0 887bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d22}, [r8@64], r1 ; store oq1 897bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d23}, [r8@64], r1 ; store oq2 907bc9febe8749e98a3812a0dc4380ceae75c29450Johann 917bc9febe8749e98a3812a0dc4380ceae75c29450Johann b h_next 927bc9febe8749e98a3812a0dc4380ceae75c29450Johann 937bc9febe8749e98a3812a0dc4380ceae75c29450Johannh_wide_mbfilter 947bc9febe8749e98a3812a0dc4380ceae75c29450Johann sub r8, r0, r1, lsl #3 957bc9febe8749e98a3812a0dc4380ceae75c29450Johann add r8, r8, r1 967bc9febe8749e98a3812a0dc4380ceae75c29450Johann 977bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d16}, [r8@64], r1 ; store op6 987bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d24}, [r8@64], r1 ; store op5 997bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d25}, [r8@64], r1 ; store op4 1007bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d26}, [r8@64], r1 ; store op3 1017bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d27}, [r8@64], r1 ; store op2 1027bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d18}, [r8@64], r1 ; store op1 1037bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d19}, [r8@64], r1 ; store op0 1047bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d20}, [r8@64], r1 ; store oq0 1057bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d21}, [r8@64], r1 ; store oq1 1067bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d22}, [r8@64], r1 ; store oq2 1077bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d23}, [r8@64], r1 ; store oq3 1087bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d1}, [r8@64], r1 ; store oq4 1097bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d2}, [r8@64], r1 ; store oq5 1107bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.u8 {d3}, [r8@64], r1 ; store oq6 1117bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1127bc9febe8749e98a3812a0dc4380ceae75c29450Johannh_next 1137bc9febe8749e98a3812a0dc4380ceae75c29450Johann add r0, r0, #8 1147bc9febe8749e98a3812a0dc4380ceae75c29450Johann subs r12, r12, #1 1157bc9febe8749e98a3812a0dc4380ceae75c29450Johann bne h_count 1167bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1177bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpop {d8-d15} 1187bc9febe8749e98a3812a0dc4380ceae75c29450Johann pop {r4-r8, pc} 1197bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1207bc9febe8749e98a3812a0dc4380ceae75c29450Johann ENDP ; |mb_lpf_horizontal_edge| 1217bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1227bc9febe8749e98a3812a0dc4380ceae75c29450Johann; void vpx_lpf_horizontal_16_neon(uint8_t *s, int pitch, 1237bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *blimit, 1247bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *limit, 1257bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *thresh) 1267bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r0 uint8_t *s, 1277bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r1 int pitch, 1287bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r2 const uint8_t *blimit, 1297bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r3 const uint8_t *limit, 1307bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp const uint8_t *thresh 1317bc9febe8749e98a3812a0dc4380ceae75c29450Johann|vpx_lpf_horizontal_16_neon| PROC 1327bc9febe8749e98a3812a0dc4380ceae75c29450Johann mov r12, #1 1337bc9febe8749e98a3812a0dc4380ceae75c29450Johann b mb_lpf_horizontal_edge 1347bc9febe8749e98a3812a0dc4380ceae75c29450Johann ENDP ; |vpx_lpf_horizontal_16_neon| 1357bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1367bc9febe8749e98a3812a0dc4380ceae75c29450Johann; void vpx_lpf_horizontal_16_dual_neon(uint8_t *s, int pitch, 1377bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *blimit, 1387bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *limit, 1397bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *thresh) 1407bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r0 uint8_t *s, 1417bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r1 int pitch, 1427bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r2 const uint8_t *blimit, 1437bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r3 const uint8_t *limit, 1447bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp const uint8_t *thresh 1457bc9febe8749e98a3812a0dc4380ceae75c29450Johann|vpx_lpf_horizontal_16_dual_neon| PROC 1467bc9febe8749e98a3812a0dc4380ceae75c29450Johann mov r12, #2 1477bc9febe8749e98a3812a0dc4380ceae75c29450Johann b mb_lpf_horizontal_edge 1487bc9febe8749e98a3812a0dc4380ceae75c29450Johann ENDP ; |vpx_lpf_horizontal_16_dual_neon| 1497bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1507bc9febe8749e98a3812a0dc4380ceae75c29450Johann; void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit, 1517bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *limit, const uint8_t *thresh, 1527bc9febe8749e98a3812a0dc4380ceae75c29450Johann; int count) { 1537bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r0 uint8_t *s, 1547bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r1 int p, /* pitch */ 1557bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r2 const uint8_t *blimit, 1567bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r3 const uint8_t *limit, 1577bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp const uint8_t *thresh, 1587bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r12 int count 1597bc9febe8749e98a3812a0dc4380ceae75c29450Johann|mb_lpf_vertical_edge_w| PROC 1607bc9febe8749e98a3812a0dc4380ceae75c29450Johann push {r4-r8, lr} 1617bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpush {d8-d15} 1627bc9febe8749e98a3812a0dc4380ceae75c29450Johann ldr r4, [sp, #88] ; load thresh 1637bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1647bc9febe8749e98a3812a0dc4380ceae75c29450Johannv_count 1657bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d16[]}, [r2] ; load *blimit 1667bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d17[]}, [r3] ; load *limit 1677bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d18[]}, [r4] ; load *thresh 1687bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1697bc9febe8749e98a3812a0dc4380ceae75c29450Johann sub r8, r0, #8 1707bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1717bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d0}, [r8@64], r1 1727bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d8}, [r0@64], r1 1737bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d1}, [r8@64], r1 1747bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d9}, [r0@64], r1 1757bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d2}, [r8@64], r1 1767bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d10}, [r0@64], r1 1777bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d3}, [r8@64], r1 1787bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d11}, [r0@64], r1 1797bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d4}, [r8@64], r1 1807bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d12}, [r0@64], r1 1817bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d5}, [r8@64], r1 1827bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d13}, [r0@64], r1 1837bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d6}, [r8@64], r1 1847bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d14}, [r0@64], r1 1857bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d7}, [r8@64], r1 1867bc9febe8749e98a3812a0dc4380ceae75c29450Johann vld1.8 {d15}, [r0@64], r1 1877bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1887bc9febe8749e98a3812a0dc4380ceae75c29450Johann sub r0, r0, r1, lsl #3 1897bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1907bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.32 q0, q2 1917bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.32 q1, q3 1927bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.32 q4, q6 1937bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.32 q5, q7 1947bc9febe8749e98a3812a0dc4380ceae75c29450Johann 1957bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.16 q0, q1 1967bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.16 q2, q3 1977bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.16 q4, q5 1987bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.16 q6, q7 1997bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2007bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.8 d0, d1 2017bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.8 d2, d3 2027bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.8 d4, d5 2037bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.8 d6, d7 2047bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2057bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.8 d8, d9 2067bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.8 d10, d11 2077bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.8 d12, d13 2087bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.8 d14, d15 2097bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2107bc9febe8749e98a3812a0dc4380ceae75c29450Johann bl vpx_wide_mbfilter_neon 2117bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2127bc9febe8749e98a3812a0dc4380ceae75c29450Johann tst r7, #1 2137bc9febe8749e98a3812a0dc4380ceae75c29450Johann beq v_mbfilter 2147bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2157bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; flat && mask were not set for any of the channels. Just store the values 2167bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; from filter. 2177bc9febe8749e98a3812a0dc4380ceae75c29450Johann sub r0, #2 2187bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2197bc9febe8749e98a3812a0dc4380ceae75c29450Johann vswp d23, d25 2207bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2217bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r0], r1 2227bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r0], r1 2237bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r0], r1 2247bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r0], r1 2257bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r0], r1 2267bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r0], r1 2277bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r0], r1 2287bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r0], r1 2297bc9febe8749e98a3812a0dc4380ceae75c29450Johann add r0, #2 2307bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2317bc9febe8749e98a3812a0dc4380ceae75c29450Johann b v_next 2327bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2337bc9febe8749e98a3812a0dc4380ceae75c29450Johannv_mbfilter 2347bc9febe8749e98a3812a0dc4380ceae75c29450Johann tst r7, #2 2357bc9febe8749e98a3812a0dc4380ceae75c29450Johann beq v_wide_mbfilter 2367bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2377bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; flat2 was not set for any of the channels. Just store the values from 2387bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; mbfilter. 2397bc9febe8749e98a3812a0dc4380ceae75c29450Johann sub r8, r0, #3 2407bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2417bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1 2427bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1 2437bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1 2447bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1 2457bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1 2467bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1 2477bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1 2487bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1 2497bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1 2507bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1 2517bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1 2527bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1 2537bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1 2547bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1 2557bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1 2567bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1 2577bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2587bc9febe8749e98a3812a0dc4380ceae75c29450Johann b v_next 2597bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2607bc9febe8749e98a3812a0dc4380ceae75c29450Johannv_wide_mbfilter 2617bc9febe8749e98a3812a0dc4380ceae75c29450Johann sub r8, r0, #8 2627bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2637bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.32 d0, d26 2647bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.32 d16, d27 2657bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.32 d24, d18 2667bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.32 d25, d19 2677bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2687bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.16 d0, d24 2697bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.16 d16, d25 2707bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.16 d26, d18 2717bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.16 d27, d19 2727bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2737bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.8 d0, d16 2747bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.8 d24, d25 2757bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.8 d26, d27 2767bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.8 d18, d19 2777bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2787bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.32 d20, d1 2797bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.32 d21, d2 2807bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.32 d22, d3 2817bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.32 d23, d15 2827bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2837bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.16 d20, d22 2847bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.16 d21, d23 2857bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.16 d1, d3 2867bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.16 d2, d15 2877bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2887bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.8 d20, d21 2897bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.8 d22, d23 2907bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.8 d1, d2 2917bc9febe8749e98a3812a0dc4380ceae75c29450Johann vtrn.8 d3, d15 2927bc9febe8749e98a3812a0dc4380ceae75c29450Johann 2937bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.8 {d0}, [r8@64], r1 2947bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.8 {d20}, [r0@64], r1 2957bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.8 {d16}, [r8@64], r1 2967bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.8 {d21}, [r0@64], r1 2977bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.8 {d24}, [r8@64], r1 2987bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.8 {d22}, [r0@64], r1 2997bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.8 {d25}, [r8@64], r1 3007bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.8 {d23}, [r0@64], r1 3017bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.8 {d26}, [r8@64], r1 3027bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.8 {d1}, [r0@64], r1 3037bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.8 {d27}, [r8@64], r1 3047bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.8 {d2}, [r0@64], r1 3057bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.8 {d18}, [r8@64], r1 3067bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.8 {d3}, [r0@64], r1 3077bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.8 {d19}, [r8@64], r1 3087bc9febe8749e98a3812a0dc4380ceae75c29450Johann vst1.8 {d15}, [r0@64], r1 3097bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3107bc9febe8749e98a3812a0dc4380ceae75c29450Johannv_next 3117bc9febe8749e98a3812a0dc4380ceae75c29450Johann subs r12, #1 3127bc9febe8749e98a3812a0dc4380ceae75c29450Johann bne v_count 3137bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3147bc9febe8749e98a3812a0dc4380ceae75c29450Johann vpop {d8-d15} 3157bc9febe8749e98a3812a0dc4380ceae75c29450Johann pop {r4-r8, pc} 3167bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3177bc9febe8749e98a3812a0dc4380ceae75c29450Johann ENDP ; |mb_lpf_vertical_edge_w| 3187bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3197bc9febe8749e98a3812a0dc4380ceae75c29450Johann; void vpx_lpf_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit, 3207bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *limit, const uint8_t *thresh) 3217bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r0 uint8_t *s, 3227bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r1 int p, /* pitch */ 3237bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r2 const uint8_t *blimit, 3247bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r3 const uint8_t *limit, 3257bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp const uint8_t *thresh 3267bc9febe8749e98a3812a0dc4380ceae75c29450Johann|vpx_lpf_vertical_16_neon| PROC 3277bc9febe8749e98a3812a0dc4380ceae75c29450Johann mov r12, #1 3287bc9febe8749e98a3812a0dc4380ceae75c29450Johann b mb_lpf_vertical_edge_w 3297bc9febe8749e98a3812a0dc4380ceae75c29450Johann ENDP ; |vpx_lpf_vertical_16_neon| 3307bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3317bc9febe8749e98a3812a0dc4380ceae75c29450Johann; void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit, 3327bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *limit, 3337bc9febe8749e98a3812a0dc4380ceae75c29450Johann; const uint8_t *thresh) 3347bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r0 uint8_t *s, 3357bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r1 int p, /* pitch */ 3367bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r2 const uint8_t *blimit, 3377bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r3 const uint8_t *limit, 3387bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp const uint8_t *thresh 3397bc9febe8749e98a3812a0dc4380ceae75c29450Johann|vpx_lpf_vertical_16_dual_neon| PROC 3407bc9febe8749e98a3812a0dc4380ceae75c29450Johann mov r12, #2 3417bc9febe8749e98a3812a0dc4380ceae75c29450Johann b mb_lpf_vertical_edge_w 3427bc9febe8749e98a3812a0dc4380ceae75c29450Johann ENDP ; |vpx_lpf_vertical_16_dual_neon| 3437bc9febe8749e98a3812a0dc4380ceae75c29450Johann 3447bc9febe8749e98a3812a0dc4380ceae75c29450Johann; void vpx_wide_mbfilter_neon(); 345b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian; This is a helper function for the loopfilters. The invidual functions do the 3467bc9febe8749e98a3812a0dc4380ceae75c29450Johann; necessary load, transpose (if necessary) and store. 347b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian; 3487bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r0-r3 PRESERVE 3497bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d16 blimit 3507bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d17 limit 3517bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d18 thresh 3527bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d0 p7 3537bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d1 p6 3547bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d2 p5 3557bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d3 p4 3567bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d4 p3 3577bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d5 p2 3587bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d6 p1 3597bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d7 p0 3607bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d8 q0 3617bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d9 q1 3627bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d10 q2 3637bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d11 q3 3647bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d12 q4 3657bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d13 q5 3667bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d14 q6 3677bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d15 q7 3687bc9febe8749e98a3812a0dc4380ceae75c29450Johann|vpx_wide_mbfilter_neon| PROC 3697bc9febe8749e98a3812a0dc4380ceae75c29450Johann mov r7, #0 370b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 371b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian ; filter_mask 3727bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d19, d4, d5 ; abs(p3 - p2) 3737bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d20, d5, d6 ; abs(p2 - p1) 3747bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d21, d6, d7 ; abs(p1 - p0) 3757bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d22, d9, d8 ; abs(q1 - q0) 3767bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d23, d10, d9 ; abs(q2 - q1) 3777bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d24, d11, d10 ; abs(q3 - q2) 378b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 379b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian ; only compare the largest value to limit 3807bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmax.u8 d19, d19, d20 ; max(abs(p3 - p2), abs(p2 - p1)) 3817bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmax.u8 d20, d21, d22 ; max(abs(p1 - p0), abs(q1 - q0)) 3827bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmax.u8 d23, d23, d24 ; max(abs(q2 - q1), abs(q3 - q2)) 3837bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmax.u8 d19, d19, d20 384b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 3857bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d24, d7, d8 ; abs(p0 - q0) 386b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 3877bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmax.u8 d19, d19, d23 388b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 3897bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d23, d6, d9 ; a = abs(p1 - q1) 3907bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqadd.u8 d24, d24, d24 ; b = abs(p0 - q0) * 2 391b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 3927bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; abs () > limit 3937bc9febe8749e98a3812a0dc4380ceae75c29450Johann vcge.u8 d19, d17, d19 394b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 3957bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; flatmask4 3967bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d25, d7, d5 ; abs(p0 - p2) 3977bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d26, d8, d10 ; abs(q0 - q2) 3987bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d27, d4, d7 ; abs(p3 - p0) 3997bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d28, d11, d8 ; abs(q3 - q0) 400b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 4017bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; only compare the largest value to thresh 4027bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmax.u8 d25, d25, d26 ; max(abs(p0 - p2), abs(q0 - q2)) 4037bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmax.u8 d26, d27, d28 ; max(abs(p3 - p0), abs(q3 - q0)) 4047bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmax.u8 d25, d25, d26 4057bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmax.u8 d20, d20, d25 406b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 4077bc9febe8749e98a3812a0dc4380ceae75c29450Johann vshr.u8 d23, d23, #1 ; a = a / 2 4087bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqadd.u8 d24, d24, d23 ; a = b + a 409b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 4107bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmov.u8 d30, #1 4117bc9febe8749e98a3812a0dc4380ceae75c29450Johann vcge.u8 d24, d16, d24 ; (a > blimit * 2 + limit) * -1 412b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 4137bc9febe8749e98a3812a0dc4380ceae75c29450Johann vcge.u8 d20, d30, d20 ; flat 414b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 4157bc9febe8749e98a3812a0dc4380ceae75c29450Johann vand d19, d19, d24 ; mask 416b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 4177bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; hevmask 4187bc9febe8749e98a3812a0dc4380ceae75c29450Johann vcgt.u8 d21, d21, d18 ; (abs(p1 - p0) > thresh)*-1 4197bc9febe8749e98a3812a0dc4380ceae75c29450Johann vcgt.u8 d22, d22, d18 ; (abs(q1 - q0) > thresh)*-1 4207bc9febe8749e98a3812a0dc4380ceae75c29450Johann vorr d21, d21, d22 ; hev 421b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 4227bc9febe8749e98a3812a0dc4380ceae75c29450Johann vand d16, d20, d19 ; flat && mask 4237bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmov r5, r6, d16 424b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 4257bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7) 4267bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d22, d3, d7 ; abs(p4 - p0) 4277bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d23, d12, d8 ; abs(q4 - q0) 4287bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d24, d7, d2 ; abs(p0 - p5) 4297bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d25, d8, d13 ; abs(q0 - q5) 4307bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d26, d1, d7 ; abs(p6 - p0) 4317bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d27, d14, d8 ; abs(q6 - q0) 4327bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d28, d0, d7 ; abs(p7 - p0) 4337bc9febe8749e98a3812a0dc4380ceae75c29450Johann vabd.u8 d29, d15, d8 ; abs(q7 - q0) 434b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 4357bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; only compare the largest value to thresh 4367bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmax.u8 d22, d22, d23 ; max(abs(p4 - p0), abs(q4 - q0)) 4377bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmax.u8 d23, d24, d25 ; max(abs(p0 - p5), abs(q0 - q5)) 4387bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmax.u8 d24, d26, d27 ; max(abs(p6 - p0), abs(q6 - q0)) 4397bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmax.u8 d25, d28, d29 ; max(abs(p7 - p0), abs(q7 - q0)) 440b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 4417bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmax.u8 d26, d22, d23 4427bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmax.u8 d27, d24, d25 4437bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmax.u8 d23, d26, d27 444b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 4457bc9febe8749e98a3812a0dc4380ceae75c29450Johann vcge.u8 d18, d30, d23 ; flat2 446b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 4477bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmov.u8 d22, #0x80 448b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 4497bc9febe8749e98a3812a0dc4380ceae75c29450Johann orrs r5, r5, r6 ; Check for 0 4507bc9febe8749e98a3812a0dc4380ceae75c29450Johann orreq r7, r7, #1 ; Only do filter branch 451b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 4527bc9febe8749e98a3812a0dc4380ceae75c29450Johann vand d17, d18, d16 ; flat2 && flat && mask 4537bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmov r5, r6, d17 454b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 4557bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; mbfilter() function 456b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 4577bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; filter() function 4587bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; convert to signed 4597bc9febe8749e98a3812a0dc4380ceae75c29450Johann veor d23, d8, d22 ; qs0 4607bc9febe8749e98a3812a0dc4380ceae75c29450Johann veor d24, d7, d22 ; ps0 4617bc9febe8749e98a3812a0dc4380ceae75c29450Johann veor d25, d6, d22 ; ps1 4627bc9febe8749e98a3812a0dc4380ceae75c29450Johann veor d26, d9, d22 ; qs1 463b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 4647bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmov.u8 d27, #3 465b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 4667bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsub.s8 d28, d23, d24 ; ( qs0 - ps0) 4677bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1) 4687bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0) 4697bc9febe8749e98a3812a0dc4380ceae75c29450Johann vand d29, d29, d21 ; filter &= hev 4707bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0) 4717bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmov.u8 d29, #4 472b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 4737bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; filter = clamp(filter + 3 * ( qs0 - ps0)) 4747bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqmovn.s16 d28, q15 4757bc9febe8749e98a3812a0dc4380ceae75c29450Johann 4767bc9febe8749e98a3812a0dc4380ceae75c29450Johann vand d28, d28, d19 ; filter &= mask 4777bc9febe8749e98a3812a0dc4380ceae75c29450Johann 4787bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqadd.s8 d30, d28, d27 ; filter2 = clamp(filter+3) 4797bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqadd.s8 d29, d28, d29 ; filter1 = clamp(filter+4) 4807bc9febe8749e98a3812a0dc4380ceae75c29450Johann vshr.s8 d30, d30, #3 ; filter2 >>= 3 4817bc9febe8749e98a3812a0dc4380ceae75c29450Johann vshr.s8 d29, d29, #3 ; filter1 >>= 3 4827bc9febe8749e98a3812a0dc4380ceae75c29450Johann 4837bc9febe8749e98a3812a0dc4380ceae75c29450Johann 4847bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqadd.s8 d24, d24, d30 ; op0 = clamp(ps0 + filter2) 4857bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqsub.s8 d23, d23, d29 ; oq0 = clamp(qs0 - filter1) 4867bc9febe8749e98a3812a0dc4380ceae75c29450Johann 4877bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; outer tap adjustments: ++filter1 >> 1 4887bc9febe8749e98a3812a0dc4380ceae75c29450Johann vrshr.s8 d29, d29, #1 4897bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbic d29, d29, d21 ; filter &= ~hev 4907bc9febe8749e98a3812a0dc4380ceae75c29450Johann 4917bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqadd.s8 d25, d25, d29 ; op1 = clamp(ps1 + filter) 4927bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqsub.s8 d26, d26, d29 ; oq1 = clamp(qs1 - filter) 4937bc9febe8749e98a3812a0dc4380ceae75c29450Johann 4947bc9febe8749e98a3812a0dc4380ceae75c29450Johann veor d24, d24, d22 ; *f_op0 = u^0x80 4957bc9febe8749e98a3812a0dc4380ceae75c29450Johann veor d23, d23, d22 ; *f_oq0 = u^0x80 4967bc9febe8749e98a3812a0dc4380ceae75c29450Johann veor d25, d25, d22 ; *f_op1 = u^0x80 4977bc9febe8749e98a3812a0dc4380ceae75c29450Johann veor d26, d26, d22 ; *f_oq1 = u^0x80 4987bc9febe8749e98a3812a0dc4380ceae75c29450Johann 4997bc9febe8749e98a3812a0dc4380ceae75c29450Johann tst r7, #1 5007bc9febe8749e98a3812a0dc4380ceae75c29450Johann bxne lr 5017bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5027bc9febe8749e98a3812a0dc4380ceae75c29450Johann orrs r5, r5, r6 ; Check for 0 5037bc9febe8749e98a3812a0dc4380ceae75c29450Johann orreq r7, r7, #2 ; Only do mbfilter branch 5047bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5057bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; mbfilter flat && mask branch 5067bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; TODO(fgalligan): Can I decrease the cycles shifting to consective d's 5077bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; and using vibt on the q's? 5087bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmov.u8 d29, #2 5097bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q15, d7, d8 ; op2 = p0 + q0 5107bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmlal.u8 q15, d4, d27 ; op2 = p0 + q0 + p3 * 3 5117bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmlal.u8 q15, d5, d29 ; op2 = p0 + q0 + p3 * 3 + p2 * 2 5127bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q10, d4, d5 5137bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddw.u8 q15, d6 ; op2=p1 + p0 + q0 + p3 * 3 + p2 *2 5147bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q14, d6, d9 5157bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d18, q15, #3 ; r_op2 5167bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5177bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsub.i16 q15, q10 5187bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q10, d4, d6 5197bc9febe8749e98a3812a0dc4380ceae75c29450Johann vadd.i16 q15, q14 5207bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q14, d7, d10 5217bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d19, q15, #3 ; r_op1 5227bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5237bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsub.i16 q15, q10 5247bc9febe8749e98a3812a0dc4380ceae75c29450Johann vadd.i16 q15, q14 5257bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q14, d8, d11 5267bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d20, q15, #3 ; r_op0 5277bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5287bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsubw.u8 q15, d4 ; oq0 = op0 - p3 5297bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsubw.u8 q15, d7 ; oq0 -= p0 5307bc9febe8749e98a3812a0dc4380ceae75c29450Johann vadd.i16 q15, q14 5317bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q14, d9, d11 5327bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d21, q15, #3 ; r_oq0 5337bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5347bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsubw.u8 q15, d5 ; oq1 = oq0 - p2 5357bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsubw.u8 q15, d8 ; oq1 -= q0 5367bc9febe8749e98a3812a0dc4380ceae75c29450Johann vadd.i16 q15, q14 5377bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q14, d10, d11 5387bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d22, q15, #3 ; r_oq1 5397bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5407bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsubw.u8 q15, d6 ; oq2 = oq0 - p1 5417bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsubw.u8 q15, d9 ; oq2 -= q1 5427bc9febe8749e98a3812a0dc4380ceae75c29450Johann vadd.i16 q15, q14 5437bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d27, q15, #3 ; r_oq2 5447bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5457bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; Filter does not set op2 or oq2, so use p2 and q2. 5467bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d18, d5, d16 ; t_op2 |= p2 & ~(flat & mask) 5477bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d19, d25, d16 ; t_op1 |= f_op1 & ~(flat & mask) 5487bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d20, d24, d16 ; t_op0 |= f_op0 & ~(flat & mask) 5497bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d21, d23, d16 ; t_oq0 |= f_oq0 & ~(flat & mask) 5507bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d22, d26, d16 ; t_oq1 |= f_oq1 & ~(flat & mask) 5517bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5527bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbit d23, d27, d16 ; t_oq2 |= r_oq2 & (flat & mask) 5537bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d23, d10, d16 ; t_oq2 |= q2 & ~(flat & mask) 5547bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5557bc9febe8749e98a3812a0dc4380ceae75c29450Johann tst r7, #2 5567bc9febe8749e98a3812a0dc4380ceae75c29450Johann bxne lr 5577bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5587bc9febe8749e98a3812a0dc4380ceae75c29450Johann ; wide_mbfilter flat2 && flat && mask branch 5597bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmov.u8 d16, #7 5607bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q15, d7, d8 ; op6 = p0 + q0 5617bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q12, d2, d3 5627bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q13, d4, d5 5637bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q14, d1, d6 5647bc9febe8749e98a3812a0dc4380ceae75c29450Johann vmlal.u8 q15, d0, d16 ; op6 += p7 * 3 5657bc9febe8749e98a3812a0dc4380ceae75c29450Johann vadd.i16 q12, q13 5667bc9febe8749e98a3812a0dc4380ceae75c29450Johann vadd.i16 q15, q14 5677bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q14, d2, d9 5687bc9febe8749e98a3812a0dc4380ceae75c29450Johann vadd.i16 q15, q12 5697bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q12, d0, d1 5707bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddw.u8 q15, d1 5717bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q13, d0, d2 5727bc9febe8749e98a3812a0dc4380ceae75c29450Johann vadd.i16 q14, q15, q14 5737bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d16, q15, #4 ; w_op6 5747bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5757bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsub.i16 q15, q14, q12 5767bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q14, d3, d10 5777bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d24, q15, #4 ; w_op5 5787bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5797bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsub.i16 q15, q13 5807bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q13, d0, d3 5817bc9febe8749e98a3812a0dc4380ceae75c29450Johann vadd.i16 q15, q14 5827bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q14, d4, d11 5837bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d25, q15, #4 ; w_op4 5847bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5857bc9febe8749e98a3812a0dc4380ceae75c29450Johann vadd.i16 q15, q14 5867bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q14, d0, d4 5877bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsub.i16 q15, q13 5887bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsub.i16 q14, q15, q14 5897bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d26, q15, #4 ; w_op3 5907bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5917bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddw.u8 q15, q14, d5 ; op2 += p2 5927bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q14, d0, d5 5937bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddw.u8 q15, d12 ; op2 += q4 5947bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d26, d4, d17 ; op3 |= p3 & ~(f2 & f & m) 5957bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d27, q15, #4 ; w_op2 5967bc9febe8749e98a3812a0dc4380ceae75c29450Johann 5977bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsub.i16 q15, q14 5987bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q14, d0, d6 5997bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddw.u8 q15, d6 ; op1 += p1 6007bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddw.u8 q15, d13 ; op1 += q5 6017bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d27, d18, d17 ; op2 |= t_op2 & ~(f2 & f & m) 6027bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d18, q15, #4 ; w_op1 6037bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6047bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsub.i16 q15, q14 6057bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q14, d0, d7 6067bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddw.u8 q15, d7 ; op0 += p0 6077bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddw.u8 q15, d14 ; op0 += q6 6087bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d18, d19, d17 ; op1 |= t_op1 & ~(f2 & f & m) 6097bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d19, q15, #4 ; w_op0 6107bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6117bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsub.i16 q15, q14 6127bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q14, d1, d8 6137bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddw.u8 q15, d8 ; oq0 += q0 6147bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddw.u8 q15, d15 ; oq0 += q7 6157bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d19, d20, d17 ; op0 |= t_op0 & ~(f2 & f & m) 6167bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d20, q15, #4 ; w_oq0 6177bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6187bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsub.i16 q15, q14 6197bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q14, d2, d9 6207bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddw.u8 q15, d9 ; oq1 += q1 6217bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q4, d10, d15 6227bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddw.u8 q15, d15 ; oq1 += q7 6237bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d20, d21, d17 ; oq0 |= t_oq0 & ~(f2 & f & m) 6247bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d21, q15, #4 ; w_oq1 6257bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6267bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsub.i16 q15, q14 6277bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q14, d3, d10 6287bc9febe8749e98a3812a0dc4380ceae75c29450Johann vadd.i16 q15, q4 6297bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q4, d11, d15 6307bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d21, d22, d17 ; oq1 |= t_oq1 & ~(f2 & f & m) 6317bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d22, q15, #4 ; w_oq2 6327bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6337bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsub.i16 q15, q14 6347bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q14, d4, d11 6357bc9febe8749e98a3812a0dc4380ceae75c29450Johann vadd.i16 q15, q4 6367bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q4, d12, d15 6377bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d22, d23, d17 ; oq2 |= t_oq2 & ~(f2 & f & m) 6387bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d23, q15, #4 ; w_oq3 6397bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6407bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsub.i16 q15, q14 6417bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q14, d5, d12 6427bc9febe8749e98a3812a0dc4380ceae75c29450Johann vadd.i16 q15, q4 6437bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q4, d13, d15 6447bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d16, d1, d17 ; op6 |= p6 & ~(f2 & f & m) 6457bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d1, q15, #4 ; w_oq4 6467bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6477bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsub.i16 q15, q14 6487bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q14, d6, d13 6497bc9febe8749e98a3812a0dc4380ceae75c29450Johann vadd.i16 q15, q4 6507bc9febe8749e98a3812a0dc4380ceae75c29450Johann vaddl.u8 q4, d14, d15 6517bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d24, d2, d17 ; op5 |= p5 & ~(f2 & f & m) 6527bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d2, q15, #4 ; w_oq5 6537bc9febe8749e98a3812a0dc4380ceae75c29450Johann 6547bc9febe8749e98a3812a0dc4380ceae75c29450Johann vsub.i16 q15, q14 6557bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d25, d3, d17 ; op4 |= p4 & ~(f2 & f & m) 6567bc9febe8749e98a3812a0dc4380ceae75c29450Johann vadd.i16 q15, q4 6577bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d23, d11, d17 ; oq3 |= q3 & ~(f2 & f & m) 6587bc9febe8749e98a3812a0dc4380ceae75c29450Johann vqrshrn.u16 d3, q15, #4 ; w_oq6 6597bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d1, d12, d17 ; oq4 |= q4 & ~(f2 & f & m) 6607bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d2, d13, d17 ; oq5 |= q5 & ~(f2 & f & m) 6617bc9febe8749e98a3812a0dc4380ceae75c29450Johann vbif d3, d14, d17 ; oq6 |= q6 & ~(f2 & f & m) 662b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 663b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian bx lr 6647bc9febe8749e98a3812a0dc4380ceae75c29450Johann ENDP ; |vpx_wide_mbfilter_neon| 665b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian 666b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian END 667