1b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian;
2b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian;  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian;
4b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian;  Use of this source code is governed by a BSD-style license
5b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian;  that can be found in the LICENSE file in the root of the source
6b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian;  tree. An additional intellectual property rights grant can be found
7b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian;  in the file PATENTS.  All contributing project authors may
8b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian;  be found in the AUTHORS file in the root of the source tree.
9b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian;
10b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
117bc9febe8749e98a3812a0dc4380ceae75c29450Johann    EXPORT  |vpx_lpf_horizontal_16_neon|
127bc9febe8749e98a3812a0dc4380ceae75c29450Johann    EXPORT  |vpx_lpf_horizontal_16_dual_neon|
137bc9febe8749e98a3812a0dc4380ceae75c29450Johann    EXPORT  |vpx_lpf_vertical_16_neon|
147bc9febe8749e98a3812a0dc4380ceae75c29450Johann    EXPORT  |vpx_lpf_vertical_16_dual_neon|
15b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ARM
16b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
17b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    AREA ||.text||, CODE, READONLY, ALIGN=2
18b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
197bc9febe8749e98a3812a0dc4380ceae75c29450Johann; void mb_lpf_horizontal_edge(uint8_t *s, int p,
207bc9febe8749e98a3812a0dc4380ceae75c29450Johann;                             const uint8_t *blimit,
217bc9febe8749e98a3812a0dc4380ceae75c29450Johann;                             const uint8_t *limit,
227bc9febe8749e98a3812a0dc4380ceae75c29450Johann;                             const uint8_t *thresh,
237bc9febe8749e98a3812a0dc4380ceae75c29450Johann;                             int count)
24b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian; r0    uint8_t *s,
257bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r1    int p, /* pitch */
267bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r2    const uint8_t *blimit,
277bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r3    const uint8_t *limit,
287bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp    const uint8_t *thresh,
297bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r12   int count
307bc9febe8749e98a3812a0dc4380ceae75c29450Johann|mb_lpf_horizontal_edge| PROC
317bc9febe8749e98a3812a0dc4380ceae75c29450Johann    push        {r4-r8, lr}
327bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpush       {d8-d15}
337bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ldr         r4, [sp, #88]              ; load thresh
347bc9febe8749e98a3812a0dc4380ceae75c29450Johann
357bc9febe8749e98a3812a0dc4380ceae75c29450Johannh_count
367bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d16[]}, [r2]              ; load *blimit
377bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d17[]}, [r3]              ; load *limit
387bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d18[]}, [r4]              ; load *thresh
397bc9febe8749e98a3812a0dc4380ceae75c29450Johann
407bc9febe8749e98a3812a0dc4380ceae75c29450Johann    sub         r8, r0, r1, lsl #3         ; move src pointer down by 8 lines
417bc9febe8749e98a3812a0dc4380ceae75c29450Johann
427bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.u8     {d0}, [r8@64], r1          ; p7
437bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.u8     {d1}, [r8@64], r1          ; p6
447bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.u8     {d2}, [r8@64], r1          ; p5
457bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.u8     {d3}, [r8@64], r1          ; p4
467bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.u8     {d4}, [r8@64], r1          ; p3
477bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.u8     {d5}, [r8@64], r1          ; p2
487bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.u8     {d6}, [r8@64], r1          ; p1
497bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.u8     {d7}, [r8@64], r1          ; p0
507bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.u8     {d8}, [r8@64], r1          ; q0
517bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.u8     {d9}, [r8@64], r1          ; q1
527bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.u8     {d10}, [r8@64], r1         ; q2
537bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.u8     {d11}, [r8@64], r1         ; q3
547bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.u8     {d12}, [r8@64], r1         ; q4
557bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.u8     {d13}, [r8@64], r1         ; q5
567bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.u8     {d14}, [r8@64], r1         ; q6
577bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.u8     {d15}, [r8@64], r1         ; q7
587bc9febe8749e98a3812a0dc4380ceae75c29450Johann
597bc9febe8749e98a3812a0dc4380ceae75c29450Johann    bl          vpx_wide_mbfilter_neon
607bc9febe8749e98a3812a0dc4380ceae75c29450Johann
617bc9febe8749e98a3812a0dc4380ceae75c29450Johann    tst         r7, #1
627bc9febe8749e98a3812a0dc4380ceae75c29450Johann    beq         h_mbfilter
637bc9febe8749e98a3812a0dc4380ceae75c29450Johann
647bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; flat && mask were not set for any of the channels. Just store the values
657bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; from filter.
667bc9febe8749e98a3812a0dc4380ceae75c29450Johann    sub         r8, r0, r1, lsl #1
677bc9febe8749e98a3812a0dc4380ceae75c29450Johann
687bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d25}, [r8@64], r1         ; store op1
697bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d24}, [r8@64], r1         ; store op0
707bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d23}, [r8@64], r1         ; store oq0
717bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d26}, [r8@64], r1         ; store oq1
727bc9febe8749e98a3812a0dc4380ceae75c29450Johann
737bc9febe8749e98a3812a0dc4380ceae75c29450Johann    b           h_next
747bc9febe8749e98a3812a0dc4380ceae75c29450Johann
757bc9febe8749e98a3812a0dc4380ceae75c29450Johannh_mbfilter
767bc9febe8749e98a3812a0dc4380ceae75c29450Johann    tst         r7, #2
777bc9febe8749e98a3812a0dc4380ceae75c29450Johann    beq         h_wide_mbfilter
787bc9febe8749e98a3812a0dc4380ceae75c29450Johann
797bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; flat2 was not set for any of the channels. Just store the values from
807bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; mbfilter.
817bc9febe8749e98a3812a0dc4380ceae75c29450Johann    sub         r8, r0, r1, lsl #1
827bc9febe8749e98a3812a0dc4380ceae75c29450Johann    sub         r8, r8, r1
837bc9febe8749e98a3812a0dc4380ceae75c29450Johann
847bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d18}, [r8@64], r1         ; store op2
857bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d19}, [r8@64], r1         ; store op1
867bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d20}, [r8@64], r1         ; store op0
877bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d21}, [r8@64], r1         ; store oq0
887bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d22}, [r8@64], r1         ; store oq1
897bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d23}, [r8@64], r1         ; store oq2
907bc9febe8749e98a3812a0dc4380ceae75c29450Johann
917bc9febe8749e98a3812a0dc4380ceae75c29450Johann    b           h_next
927bc9febe8749e98a3812a0dc4380ceae75c29450Johann
937bc9febe8749e98a3812a0dc4380ceae75c29450Johannh_wide_mbfilter
947bc9febe8749e98a3812a0dc4380ceae75c29450Johann    sub         r8, r0, r1, lsl #3
957bc9febe8749e98a3812a0dc4380ceae75c29450Johann    add         r8, r8, r1
967bc9febe8749e98a3812a0dc4380ceae75c29450Johann
977bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d16}, [r8@64], r1         ; store op6
987bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d24}, [r8@64], r1         ; store op5
997bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d25}, [r8@64], r1         ; store op4
1007bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d26}, [r8@64], r1         ; store op3
1017bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d27}, [r8@64], r1         ; store op2
1027bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d18}, [r8@64], r1         ; store op1
1037bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d19}, [r8@64], r1         ; store op0
1047bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d20}, [r8@64], r1         ; store oq0
1057bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d21}, [r8@64], r1         ; store oq1
1067bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d22}, [r8@64], r1         ; store oq2
1077bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d23}, [r8@64], r1         ; store oq3
1087bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d1}, [r8@64], r1          ; store oq4
1097bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d2}, [r8@64], r1          ; store oq5
1107bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.u8     {d3}, [r8@64], r1          ; store oq6
1117bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1127bc9febe8749e98a3812a0dc4380ceae75c29450Johannh_next
1137bc9febe8749e98a3812a0dc4380ceae75c29450Johann    add         r0, r0, #8
1147bc9febe8749e98a3812a0dc4380ceae75c29450Johann    subs        r12, r12, #1
1157bc9febe8749e98a3812a0dc4380ceae75c29450Johann    bne         h_count
1167bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1177bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpop        {d8-d15}
1187bc9febe8749e98a3812a0dc4380ceae75c29450Johann    pop         {r4-r8, pc}
1197bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1207bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ENDP        ; |mb_lpf_horizontal_edge|
1217bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1227bc9febe8749e98a3812a0dc4380ceae75c29450Johann; void vpx_lpf_horizontal_16_neon(uint8_t *s, int pitch,
1237bc9febe8749e98a3812a0dc4380ceae75c29450Johann;                                     const uint8_t *blimit,
1247bc9febe8749e98a3812a0dc4380ceae75c29450Johann;                                     const uint8_t *limit,
1257bc9febe8749e98a3812a0dc4380ceae75c29450Johann;                                     const uint8_t *thresh)
1267bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r0    uint8_t *s,
1277bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r1    int pitch,
1287bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r2    const uint8_t *blimit,
1297bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r3    const uint8_t *limit,
1307bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp    const uint8_t *thresh
1317bc9febe8749e98a3812a0dc4380ceae75c29450Johann|vpx_lpf_horizontal_16_neon| PROC
1327bc9febe8749e98a3812a0dc4380ceae75c29450Johann    mov r12, #1
1337bc9febe8749e98a3812a0dc4380ceae75c29450Johann    b mb_lpf_horizontal_edge
1347bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ENDP        ; |vpx_lpf_horizontal_16_neon|
1357bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1367bc9febe8749e98a3812a0dc4380ceae75c29450Johann; void vpx_lpf_horizontal_16_dual_neon(uint8_t *s, int pitch,
1377bc9febe8749e98a3812a0dc4380ceae75c29450Johann;                                      const uint8_t *blimit,
1387bc9febe8749e98a3812a0dc4380ceae75c29450Johann;                                      const uint8_t *limit,
1397bc9febe8749e98a3812a0dc4380ceae75c29450Johann;                                      const uint8_t *thresh)
1407bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r0    uint8_t *s,
1417bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r1    int pitch,
1427bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r2    const uint8_t *blimit,
1437bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r3    const uint8_t *limit,
1447bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp    const uint8_t *thresh
1457bc9febe8749e98a3812a0dc4380ceae75c29450Johann|vpx_lpf_horizontal_16_dual_neon| PROC
1467bc9febe8749e98a3812a0dc4380ceae75c29450Johann    mov r12, #2
1477bc9febe8749e98a3812a0dc4380ceae75c29450Johann    b mb_lpf_horizontal_edge
1487bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ENDP        ; |vpx_lpf_horizontal_16_dual_neon|
1497bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1507bc9febe8749e98a3812a0dc4380ceae75c29450Johann; void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit,
1517bc9febe8749e98a3812a0dc4380ceae75c29450Johann;                             const uint8_t *limit, const uint8_t *thresh,
1527bc9febe8749e98a3812a0dc4380ceae75c29450Johann;                             int count) {
1537bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r0    uint8_t *s,
1547bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r1    int p, /* pitch */
1557bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r2    const uint8_t *blimit,
1567bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r3    const uint8_t *limit,
1577bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp    const uint8_t *thresh,
1587bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r12   int count
1597bc9febe8749e98a3812a0dc4380ceae75c29450Johann|mb_lpf_vertical_edge_w| PROC
1607bc9febe8749e98a3812a0dc4380ceae75c29450Johann    push        {r4-r8, lr}
1617bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpush       {d8-d15}
1627bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ldr         r4, [sp, #88]              ; load thresh
1637bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1647bc9febe8749e98a3812a0dc4380ceae75c29450Johannv_count
1657bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d16[]}, [r2]              ; load *blimit
1667bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d17[]}, [r3]              ; load *limit
1677bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d18[]}, [r4]              ; load *thresh
1687bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1697bc9febe8749e98a3812a0dc4380ceae75c29450Johann    sub         r8, r0, #8
1707bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1717bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d0}, [r8@64], r1
1727bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d8}, [r0@64], r1
1737bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d1}, [r8@64], r1
1747bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d9}, [r0@64], r1
1757bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d2}, [r8@64], r1
1767bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d10}, [r0@64], r1
1777bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d3}, [r8@64], r1
1787bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d11}, [r0@64], r1
1797bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d4}, [r8@64], r1
1807bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d12}, [r0@64], r1
1817bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d5}, [r8@64], r1
1827bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d13}, [r0@64], r1
1837bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d6}, [r8@64], r1
1847bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d14}, [r0@64], r1
1857bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d7}, [r8@64], r1
1867bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vld1.8      {d15}, [r0@64], r1
1877bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1887bc9febe8749e98a3812a0dc4380ceae75c29450Johann    sub         r0, r0, r1, lsl #3
1897bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1907bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.32     q0, q2
1917bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.32     q1, q3
1927bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.32     q4, q6
1937bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.32     q5, q7
1947bc9febe8749e98a3812a0dc4380ceae75c29450Johann
1957bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.16     q0, q1
1967bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.16     q2, q3
1977bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.16     q4, q5
1987bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.16     q6, q7
1997bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2007bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.8      d0, d1
2017bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.8      d2, d3
2027bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.8      d4, d5
2037bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.8      d6, d7
2047bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2057bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.8      d8, d9
2067bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.8      d10, d11
2077bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.8      d12, d13
2087bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.8      d14, d15
2097bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2107bc9febe8749e98a3812a0dc4380ceae75c29450Johann    bl          vpx_wide_mbfilter_neon
2117bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2127bc9febe8749e98a3812a0dc4380ceae75c29450Johann    tst         r7, #1
2137bc9febe8749e98a3812a0dc4380ceae75c29450Johann    beq         v_mbfilter
2147bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2157bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; flat && mask were not set for any of the channels. Just store the values
2167bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; from filter.
2177bc9febe8749e98a3812a0dc4380ceae75c29450Johann    sub         r0, #2
2187bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2197bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vswp        d23, d25
2207bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2217bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst4.8      {d23[0], d24[0], d25[0], d26[0]}, [r0], r1
2227bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst4.8      {d23[1], d24[1], d25[1], d26[1]}, [r0], r1
2237bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst4.8      {d23[2], d24[2], d25[2], d26[2]}, [r0], r1
2247bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst4.8      {d23[3], d24[3], d25[3], d26[3]}, [r0], r1
2257bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst4.8      {d23[4], d24[4], d25[4], d26[4]}, [r0], r1
2267bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst4.8      {d23[5], d24[5], d25[5], d26[5]}, [r0], r1
2277bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst4.8      {d23[6], d24[6], d25[6], d26[6]}, [r0], r1
2287bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst4.8      {d23[7], d24[7], d25[7], d26[7]}, [r0], r1
2297bc9febe8749e98a3812a0dc4380ceae75c29450Johann    add         r0, #2
2307bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2317bc9febe8749e98a3812a0dc4380ceae75c29450Johann    b           v_next
2327bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2337bc9febe8749e98a3812a0dc4380ceae75c29450Johannv_mbfilter
2347bc9febe8749e98a3812a0dc4380ceae75c29450Johann    tst         r7, #2
2357bc9febe8749e98a3812a0dc4380ceae75c29450Johann    beq         v_wide_mbfilter
2367bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2377bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; flat2 was not set for any of the channels. Just store the values from
2387bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; mbfilter.
2397bc9febe8749e98a3812a0dc4380ceae75c29450Johann    sub         r8, r0, #3
2407bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2417bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst3.8      {d18[0], d19[0], d20[0]}, [r8], r1
2427bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst3.8      {d21[0], d22[0], d23[0]}, [r0], r1
2437bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst3.8      {d18[1], d19[1], d20[1]}, [r8], r1
2447bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst3.8      {d21[1], d22[1], d23[1]}, [r0], r1
2457bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst3.8      {d18[2], d19[2], d20[2]}, [r8], r1
2467bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst3.8      {d21[2], d22[2], d23[2]}, [r0], r1
2477bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst3.8      {d18[3], d19[3], d20[3]}, [r8], r1
2487bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst3.8      {d21[3], d22[3], d23[3]}, [r0], r1
2497bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst3.8      {d18[4], d19[4], d20[4]}, [r8], r1
2507bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst3.8      {d21[4], d22[4], d23[4]}, [r0], r1
2517bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst3.8      {d18[5], d19[5], d20[5]}, [r8], r1
2527bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst3.8      {d21[5], d22[5], d23[5]}, [r0], r1
2537bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst3.8      {d18[6], d19[6], d20[6]}, [r8], r1
2547bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst3.8      {d21[6], d22[6], d23[6]}, [r0], r1
2557bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst3.8      {d18[7], d19[7], d20[7]}, [r8], r1
2567bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst3.8      {d21[7], d22[7], d23[7]}, [r0], r1
2577bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2587bc9febe8749e98a3812a0dc4380ceae75c29450Johann    b           v_next
2597bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2607bc9febe8749e98a3812a0dc4380ceae75c29450Johannv_wide_mbfilter
2617bc9febe8749e98a3812a0dc4380ceae75c29450Johann    sub         r8, r0, #8
2627bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2637bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.32     d0,  d26
2647bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.32     d16, d27
2657bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.32     d24, d18
2667bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.32     d25, d19
2677bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2687bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.16     d0,  d24
2697bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.16     d16, d25
2707bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.16     d26, d18
2717bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.16     d27, d19
2727bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2737bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.8      d0,  d16
2747bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.8      d24, d25
2757bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.8      d26, d27
2767bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.8      d18, d19
2777bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2787bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.32     d20, d1
2797bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.32     d21, d2
2807bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.32     d22, d3
2817bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.32     d23, d15
2827bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2837bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.16     d20, d22
2847bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.16     d21, d23
2857bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.16     d1,  d3
2867bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.16     d2,  d15
2877bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2887bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.8      d20, d21
2897bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.8      d22, d23
2907bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.8      d1,  d2
2917bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vtrn.8      d3,  d15
2927bc9febe8749e98a3812a0dc4380ceae75c29450Johann
2937bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.8      {d0}, [r8@64], r1
2947bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.8      {d20}, [r0@64], r1
2957bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.8      {d16}, [r8@64], r1
2967bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.8      {d21}, [r0@64], r1
2977bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.8      {d24}, [r8@64], r1
2987bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.8      {d22}, [r0@64], r1
2997bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.8      {d25}, [r8@64], r1
3007bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.8      {d23}, [r0@64], r1
3017bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.8      {d26}, [r8@64], r1
3027bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.8      {d1}, [r0@64], r1
3037bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.8      {d27}, [r8@64], r1
3047bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.8      {d2}, [r0@64], r1
3057bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.8      {d18}, [r8@64], r1
3067bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.8      {d3}, [r0@64], r1
3077bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.8      {d19}, [r8@64], r1
3087bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vst1.8      {d15}, [r0@64], r1
3097bc9febe8749e98a3812a0dc4380ceae75c29450Johann
3107bc9febe8749e98a3812a0dc4380ceae75c29450Johannv_next
3117bc9febe8749e98a3812a0dc4380ceae75c29450Johann    subs        r12, #1
3127bc9febe8749e98a3812a0dc4380ceae75c29450Johann    bne         v_count
3137bc9febe8749e98a3812a0dc4380ceae75c29450Johann
3147bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vpop        {d8-d15}
3157bc9febe8749e98a3812a0dc4380ceae75c29450Johann    pop         {r4-r8, pc}
3167bc9febe8749e98a3812a0dc4380ceae75c29450Johann
3177bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ENDP        ; |mb_lpf_vertical_edge_w|
3187bc9febe8749e98a3812a0dc4380ceae75c29450Johann
3197bc9febe8749e98a3812a0dc4380ceae75c29450Johann; void vpx_lpf_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit,
3207bc9febe8749e98a3812a0dc4380ceae75c29450Johann;                               const uint8_t *limit, const uint8_t *thresh)
3217bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r0    uint8_t *s,
3227bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r1    int p, /* pitch */
3237bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r2    const uint8_t *blimit,
3247bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r3    const uint8_t *limit,
3257bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp    const uint8_t *thresh
3267bc9febe8749e98a3812a0dc4380ceae75c29450Johann|vpx_lpf_vertical_16_neon| PROC
3277bc9febe8749e98a3812a0dc4380ceae75c29450Johann    mov r12, #1
3287bc9febe8749e98a3812a0dc4380ceae75c29450Johann    b mb_lpf_vertical_edge_w
3297bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ENDP        ; |vpx_lpf_vertical_16_neon|
3307bc9febe8749e98a3812a0dc4380ceae75c29450Johann
3317bc9febe8749e98a3812a0dc4380ceae75c29450Johann; void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit,
3327bc9febe8749e98a3812a0dc4380ceae75c29450Johann;                                    const uint8_t *limit,
3337bc9febe8749e98a3812a0dc4380ceae75c29450Johann;                                    const uint8_t *thresh)
3347bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r0    uint8_t *s,
3357bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r1    int p, /* pitch */
3367bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r2    const uint8_t *blimit,
3377bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r3    const uint8_t *limit,
3387bc9febe8749e98a3812a0dc4380ceae75c29450Johann; sp    const uint8_t *thresh
3397bc9febe8749e98a3812a0dc4380ceae75c29450Johann|vpx_lpf_vertical_16_dual_neon| PROC
3407bc9febe8749e98a3812a0dc4380ceae75c29450Johann    mov r12, #2
3417bc9febe8749e98a3812a0dc4380ceae75c29450Johann    b mb_lpf_vertical_edge_w
3427bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ENDP        ; |vpx_lpf_vertical_16_dual_neon|
3437bc9febe8749e98a3812a0dc4380ceae75c29450Johann
3447bc9febe8749e98a3812a0dc4380ceae75c29450Johann; void vpx_wide_mbfilter_neon();
345b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian; This is a helper function for the loopfilters. The invidual functions do the
3467bc9febe8749e98a3812a0dc4380ceae75c29450Johann; necessary load, transpose (if necessary) and store.
347b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian;
3487bc9febe8749e98a3812a0dc4380ceae75c29450Johann; r0-r3 PRESERVE
3497bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d16    blimit
3507bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d17    limit
3517bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d18    thresh
3527bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d0    p7
3537bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d1    p6
3547bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d2    p5
3557bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d3    p4
3567bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d4    p3
3577bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d5    p2
3587bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d6    p1
3597bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d7    p0
3607bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d8    q0
3617bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d9    q1
3627bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d10   q2
3637bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d11   q3
3647bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d12   q4
3657bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d13   q5
3667bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d14   q6
3677bc9febe8749e98a3812a0dc4380ceae75c29450Johann; d15   q7
3687bc9febe8749e98a3812a0dc4380ceae75c29450Johann|vpx_wide_mbfilter_neon| PROC
3697bc9febe8749e98a3812a0dc4380ceae75c29450Johann    mov         r7, #0
370b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
371b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; filter_mask
3727bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d19, d4, d5                ; abs(p3 - p2)
3737bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d20, d5, d6                ; abs(p2 - p1)
3747bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d21, d6, d7                ; abs(p1 - p0)
3757bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d22, d9, d8                ; abs(q1 - q0)
3767bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d23, d10, d9               ; abs(q2 - q1)
3777bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d24, d11, d10              ; abs(q3 - q2)
378b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
379b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ; only compare the largest value to limit
3807bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmax.u8     d19, d19, d20              ; max(abs(p3 - p2), abs(p2 - p1))
3817bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmax.u8     d20, d21, d22              ; max(abs(p1 - p0), abs(q1 - q0))
3827bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmax.u8     d23, d23, d24              ; max(abs(q2 - q1), abs(q3 - q2))
3837bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmax.u8     d19, d19, d20
384b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
3857bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d24, d7, d8                ; abs(p0 - q0)
386b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
3877bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmax.u8     d19, d19, d23
388b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
3897bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d23, d6, d9                ; a = abs(p1 - q1)
3907bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqadd.u8    d24, d24, d24              ; b = abs(p0 - q0) * 2
391b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
3927bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; abs () > limit
3937bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vcge.u8     d19, d17, d19
394b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
3957bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; flatmask4
3967bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d25, d7, d5                ; abs(p0 - p2)
3977bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d26, d8, d10               ; abs(q0 - q2)
3987bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d27, d4, d7                ; abs(p3 - p0)
3997bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d28, d11, d8               ; abs(q3 - q0)
400b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4017bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; only compare the largest value to thresh
4027bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmax.u8     d25, d25, d26              ; max(abs(p0 - p2), abs(q0 - q2))
4037bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmax.u8     d26, d27, d28              ; max(abs(p3 - p0), abs(q3 - q0))
4047bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmax.u8     d25, d25, d26
4057bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmax.u8     d20, d20, d25
406b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4077bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vshr.u8     d23, d23, #1               ; a = a / 2
4087bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqadd.u8    d24, d24, d23              ; a = b + a
409b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4107bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmov.u8     d30, #1
4117bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vcge.u8     d24, d16, d24              ; (a > blimit * 2 + limit) * -1
412b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4137bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vcge.u8     d20, d30, d20              ; flat
414b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4157bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vand        d19, d19, d24              ; mask
416b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4177bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; hevmask
4187bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vcgt.u8     d21, d21, d18              ; (abs(p1 - p0) > thresh)*-1
4197bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vcgt.u8     d22, d22, d18              ; (abs(q1 - q0) > thresh)*-1
4207bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vorr        d21, d21, d22              ; hev
421b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4227bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vand        d16, d20, d19              ; flat && mask
4237bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmov        r5, r6, d16
424b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4257bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7)
4267bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d22, d3, d7                ; abs(p4 - p0)
4277bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d23, d12, d8               ; abs(q4 - q0)
4287bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d24, d7, d2                ; abs(p0 - p5)
4297bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d25, d8, d13               ; abs(q0 - q5)
4307bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d26, d1, d7                ; abs(p6 - p0)
4317bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d27, d14, d8               ; abs(q6 - q0)
4327bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d28, d0, d7                ; abs(p7 - p0)
4337bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vabd.u8     d29, d15, d8               ; abs(q7 - q0)
434b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4357bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; only compare the largest value to thresh
4367bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmax.u8     d22, d22, d23              ; max(abs(p4 - p0), abs(q4 - q0))
4377bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmax.u8     d23, d24, d25              ; max(abs(p0 - p5), abs(q0 - q5))
4387bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmax.u8     d24, d26, d27              ; max(abs(p6 - p0), abs(q6 - q0))
4397bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmax.u8     d25, d28, d29              ; max(abs(p7 - p0), abs(q7 - q0))
440b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4417bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmax.u8     d26, d22, d23
4427bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmax.u8     d27, d24, d25
4437bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmax.u8     d23, d26, d27
444b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4457bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vcge.u8     d18, d30, d23              ; flat2
446b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4477bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmov.u8     d22, #0x80
448b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4497bc9febe8749e98a3812a0dc4380ceae75c29450Johann    orrs        r5, r5, r6                 ; Check for 0
4507bc9febe8749e98a3812a0dc4380ceae75c29450Johann    orreq       r7, r7, #1                 ; Only do filter branch
451b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4527bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vand        d17, d18, d16              ; flat2 && flat && mask
4537bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmov        r5, r6, d17
454b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4557bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; mbfilter() function
456b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4577bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; filter() function
4587bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; convert to signed
4597bc9febe8749e98a3812a0dc4380ceae75c29450Johann    veor        d23, d8, d22               ; qs0
4607bc9febe8749e98a3812a0dc4380ceae75c29450Johann    veor        d24, d7, d22               ; ps0
4617bc9febe8749e98a3812a0dc4380ceae75c29450Johann    veor        d25, d6, d22               ; ps1
4627bc9febe8749e98a3812a0dc4380ceae75c29450Johann    veor        d26, d9, d22               ; qs1
463b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4647bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmov.u8     d27, #3
465b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4667bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsub.s8     d28, d23, d24              ; ( qs0 - ps0)
4677bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqsub.s8    d29, d25, d26              ; filter = clamp(ps1-qs1)
4687bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmull.s8    q15, d28, d27              ; 3 * ( qs0 - ps0)
4697bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vand        d29, d29, d21              ; filter &= hev
4707bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddw.s8    q15, q15, d29              ; filter + 3 * (qs0 - ps0)
4717bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmov.u8     d29, #4
472b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4737bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; filter = clamp(filter + 3 * ( qs0 - ps0))
4747bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqmovn.s16  d28, q15
4757bc9febe8749e98a3812a0dc4380ceae75c29450Johann
4767bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vand        d28, d28, d19              ; filter &= mask
4777bc9febe8749e98a3812a0dc4380ceae75c29450Johann
4787bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqadd.s8    d30, d28, d27              ; filter2 = clamp(filter+3)
4797bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqadd.s8    d29, d28, d29              ; filter1 = clamp(filter+4)
4807bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vshr.s8     d30, d30, #3               ; filter2 >>= 3
4817bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vshr.s8     d29, d29, #3               ; filter1 >>= 3
4827bc9febe8749e98a3812a0dc4380ceae75c29450Johann
4837bc9febe8749e98a3812a0dc4380ceae75c29450Johann
4847bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqadd.s8    d24, d24, d30              ; op0 = clamp(ps0 + filter2)
4857bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqsub.s8    d23, d23, d29              ; oq0 = clamp(qs0 - filter1)
4867bc9febe8749e98a3812a0dc4380ceae75c29450Johann
4877bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; outer tap adjustments: ++filter1 >> 1
4887bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vrshr.s8    d29, d29, #1
4897bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbic        d29, d29, d21              ; filter &= ~hev
4907bc9febe8749e98a3812a0dc4380ceae75c29450Johann
4917bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqadd.s8    d25, d25, d29              ; op1 = clamp(ps1 + filter)
4927bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqsub.s8    d26, d26, d29              ; oq1 = clamp(qs1 - filter)
4937bc9febe8749e98a3812a0dc4380ceae75c29450Johann
4947bc9febe8749e98a3812a0dc4380ceae75c29450Johann    veor        d24, d24, d22              ; *f_op0 = u^0x80
4957bc9febe8749e98a3812a0dc4380ceae75c29450Johann    veor        d23, d23, d22              ; *f_oq0 = u^0x80
4967bc9febe8749e98a3812a0dc4380ceae75c29450Johann    veor        d25, d25, d22              ; *f_op1 = u^0x80
4977bc9febe8749e98a3812a0dc4380ceae75c29450Johann    veor        d26, d26, d22              ; *f_oq1 = u^0x80
4987bc9febe8749e98a3812a0dc4380ceae75c29450Johann
4997bc9febe8749e98a3812a0dc4380ceae75c29450Johann    tst         r7, #1
5007bc9febe8749e98a3812a0dc4380ceae75c29450Johann    bxne        lr
5017bc9febe8749e98a3812a0dc4380ceae75c29450Johann
5027bc9febe8749e98a3812a0dc4380ceae75c29450Johann    orrs        r5, r5, r6                 ; Check for 0
5037bc9febe8749e98a3812a0dc4380ceae75c29450Johann    orreq       r7, r7, #2                 ; Only do mbfilter branch
5047bc9febe8749e98a3812a0dc4380ceae75c29450Johann
5057bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; mbfilter flat && mask branch
5067bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; TODO(fgalligan): Can I decrease the cycles shifting to consective d's
5077bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; and using vibt on the q's?
5087bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmov.u8     d29, #2
5097bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q15, d7, d8                ; op2 = p0 + q0
5107bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmlal.u8    q15, d4, d27               ; op2 = p0 + q0 + p3 * 3
5117bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmlal.u8    q15, d5, d29               ; op2 = p0 + q0 + p3 * 3 + p2 * 2
5127bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q10, d4, d5
5137bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddw.u8    q15, d6                    ; op2=p1 + p0 + q0 + p3 * 3 + p2 *2
5147bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q14, d6, d9
5157bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d18, q15, #3               ; r_op2
5167bc9febe8749e98a3812a0dc4380ceae75c29450Johann
5177bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsub.i16    q15, q10
5187bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q10, d4, d6
5197bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vadd.i16    q15, q14
5207bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q14, d7, d10
5217bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d19, q15, #3               ; r_op1
5227bc9febe8749e98a3812a0dc4380ceae75c29450Johann
5237bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsub.i16    q15, q10
5247bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vadd.i16    q15, q14
5257bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q14, d8, d11
5267bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d20, q15, #3               ; r_op0
5277bc9febe8749e98a3812a0dc4380ceae75c29450Johann
5287bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsubw.u8    q15, d4                    ; oq0 = op0 - p3
5297bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsubw.u8    q15, d7                    ; oq0 -= p0
5307bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vadd.i16    q15, q14
5317bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q14, d9, d11
5327bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d21, q15, #3               ; r_oq0
5337bc9febe8749e98a3812a0dc4380ceae75c29450Johann
5347bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsubw.u8    q15, d5                    ; oq1 = oq0 - p2
5357bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsubw.u8    q15, d8                    ; oq1 -= q0
5367bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vadd.i16    q15, q14
5377bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q14, d10, d11
5387bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d22, q15, #3               ; r_oq1
5397bc9febe8749e98a3812a0dc4380ceae75c29450Johann
5407bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsubw.u8    q15, d6                    ; oq2 = oq0 - p1
5417bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsubw.u8    q15, d9                    ; oq2 -= q1
5427bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vadd.i16    q15, q14
5437bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d27, q15, #3               ; r_oq2
5447bc9febe8749e98a3812a0dc4380ceae75c29450Johann
5457bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; Filter does not set op2 or oq2, so use p2 and q2.
5467bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d18, d5, d16               ; t_op2 |= p2 & ~(flat & mask)
5477bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d19, d25, d16              ; t_op1 |= f_op1 & ~(flat & mask)
5487bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d20, d24, d16              ; t_op0 |= f_op0 & ~(flat & mask)
5497bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d21, d23, d16              ; t_oq0 |= f_oq0 & ~(flat & mask)
5507bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d22, d26, d16              ; t_oq1 |= f_oq1 & ~(flat & mask)
5517bc9febe8749e98a3812a0dc4380ceae75c29450Johann
5527bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbit        d23, d27, d16              ; t_oq2 |= r_oq2 & (flat & mask)
5537bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d23, d10, d16              ; t_oq2 |= q2 & ~(flat & mask)
5547bc9febe8749e98a3812a0dc4380ceae75c29450Johann
5557bc9febe8749e98a3812a0dc4380ceae75c29450Johann    tst         r7, #2
5567bc9febe8749e98a3812a0dc4380ceae75c29450Johann    bxne        lr
5577bc9febe8749e98a3812a0dc4380ceae75c29450Johann
5587bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ; wide_mbfilter flat2 && flat && mask branch
5597bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmov.u8     d16, #7
5607bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q15, d7, d8                ; op6 = p0 + q0
5617bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q12, d2, d3
5627bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q13, d4, d5
5637bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q14, d1, d6
5647bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vmlal.u8    q15, d0, d16               ; op6 += p7 * 3
5657bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vadd.i16    q12, q13
5667bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vadd.i16    q15, q14
5677bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q14, d2, d9
5687bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vadd.i16    q15, q12
5697bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q12, d0, d1
5707bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddw.u8    q15, d1
5717bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q13, d0, d2
5727bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vadd.i16    q14, q15, q14
5737bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d16, q15, #4               ; w_op6
5747bc9febe8749e98a3812a0dc4380ceae75c29450Johann
5757bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsub.i16    q15, q14, q12
5767bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q14, d3, d10
5777bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d24, q15, #4               ; w_op5
5787bc9febe8749e98a3812a0dc4380ceae75c29450Johann
5797bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsub.i16    q15, q13
5807bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q13, d0, d3
5817bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vadd.i16    q15, q14
5827bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q14, d4, d11
5837bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d25, q15, #4               ; w_op4
5847bc9febe8749e98a3812a0dc4380ceae75c29450Johann
5857bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vadd.i16    q15, q14
5867bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q14, d0, d4
5877bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsub.i16    q15, q13
5887bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsub.i16    q14, q15, q14
5897bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d26, q15, #4               ; w_op3
5907bc9febe8749e98a3812a0dc4380ceae75c29450Johann
5917bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddw.u8    q15, q14, d5               ; op2 += p2
5927bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q14, d0, d5
5937bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddw.u8    q15, d12                   ; op2 += q4
5947bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d26, d4, d17               ; op3 |= p3 & ~(f2 & f & m)
5957bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d27, q15, #4               ; w_op2
5967bc9febe8749e98a3812a0dc4380ceae75c29450Johann
5977bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsub.i16    q15, q14
5987bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q14, d0, d6
5997bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddw.u8    q15, d6                    ; op1 += p1
6007bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddw.u8    q15, d13                   ; op1 += q5
6017bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d27, d18, d17              ; op2 |= t_op2 & ~(f2 & f & m)
6027bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d18, q15, #4               ; w_op1
6037bc9febe8749e98a3812a0dc4380ceae75c29450Johann
6047bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsub.i16    q15, q14
6057bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q14, d0, d7
6067bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddw.u8    q15, d7                    ; op0 += p0
6077bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddw.u8    q15, d14                   ; op0 += q6
6087bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d18, d19, d17              ; op1 |= t_op1 & ~(f2 & f & m)
6097bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d19, q15, #4               ; w_op0
6107bc9febe8749e98a3812a0dc4380ceae75c29450Johann
6117bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsub.i16    q15, q14
6127bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q14, d1, d8
6137bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddw.u8    q15, d8                    ; oq0 += q0
6147bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddw.u8    q15, d15                   ; oq0 += q7
6157bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d19, d20, d17              ; op0 |= t_op0 & ~(f2 & f & m)
6167bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d20, q15, #4               ; w_oq0
6177bc9febe8749e98a3812a0dc4380ceae75c29450Johann
6187bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsub.i16    q15, q14
6197bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q14, d2, d9
6207bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddw.u8    q15, d9                    ; oq1 += q1
6217bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q4, d10, d15
6227bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddw.u8    q15, d15                   ; oq1 += q7
6237bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d20, d21, d17              ; oq0 |= t_oq0 & ~(f2 & f & m)
6247bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d21, q15, #4               ; w_oq1
6257bc9febe8749e98a3812a0dc4380ceae75c29450Johann
6267bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsub.i16    q15, q14
6277bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q14, d3, d10
6287bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vadd.i16    q15, q4
6297bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q4, d11, d15
6307bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d21, d22, d17              ; oq1 |= t_oq1 & ~(f2 & f & m)
6317bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d22, q15, #4               ; w_oq2
6327bc9febe8749e98a3812a0dc4380ceae75c29450Johann
6337bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsub.i16    q15, q14
6347bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q14, d4, d11
6357bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vadd.i16    q15, q4
6367bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q4, d12, d15
6377bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d22, d23, d17              ; oq2 |= t_oq2 & ~(f2 & f & m)
6387bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d23, q15, #4               ; w_oq3
6397bc9febe8749e98a3812a0dc4380ceae75c29450Johann
6407bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsub.i16    q15, q14
6417bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q14, d5, d12
6427bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vadd.i16    q15, q4
6437bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q4, d13, d15
6447bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d16, d1, d17               ; op6 |= p6 & ~(f2 & f & m)
6457bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d1, q15, #4                ; w_oq4
6467bc9febe8749e98a3812a0dc4380ceae75c29450Johann
6477bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsub.i16    q15, q14
6487bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q14, d6, d13
6497bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vadd.i16    q15, q4
6507bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vaddl.u8    q4, d14, d15
6517bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d24, d2, d17               ; op5 |= p5 & ~(f2 & f & m)
6527bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d2, q15, #4                ; w_oq5
6537bc9febe8749e98a3812a0dc4380ceae75c29450Johann
6547bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vsub.i16    q15, q14
6557bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d25, d3, d17               ; op4 |= p4 & ~(f2 & f & m)
6567bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vadd.i16    q15, q4
6577bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d23, d11, d17              ; oq3 |= q3 & ~(f2 & f & m)
6587bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vqrshrn.u16 d3, q15, #4                ; w_oq6
6597bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d1, d12, d17               ; oq4 |= q4 & ~(f2 & f & m)
6607bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d2, d13, d17               ; oq5 |= q5 & ~(f2 & f & m)
6617bc9febe8749e98a3812a0dc4380ceae75c29450Johann    vbif        d3, d14, d17               ; oq6 |= q6 & ~(f2 & f & m)
662b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
663b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    bx          lr
6647bc9febe8749e98a3812a0dc4380ceae75c29450Johann    ENDP        ; |vpx_wide_mbfilter_neon|
665b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
666b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    END
667