1;
2;  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11    EXPORT  |vpx_convolve_copy_neon|
12    ARM
13    REQUIRE8
14    PRESERVE8
15
16    AREA ||.text||, CODE, READONLY, ALIGN=2
17
18|vpx_convolve_copy_neon| PROC
19    push                {r4-r5, lr}
20    ldrd                r4, r5, [sp, #28]
21
22    cmp                 r4, #32
23    bgt                 copy64
24    beq                 copy32
25    cmp                 r4, #8
26    bgt                 copy16
27    beq                 copy8
28    b                   copy4
29
30copy64
31    sub                 lr, r1, #32
32    sub                 r3, r3, #32
33copy64_h
34    pld                 [r0, r1, lsl #1]
35    vld1.8              {q0-q1}, [r0]!
36    vld1.8              {q2-q3}, [r0], lr
37    vst1.8              {q0-q1}, [r2@128]!
38    vst1.8              {q2-q3}, [r2@128], r3
39    subs                r5, r5, #1
40    bgt                 copy64_h
41    pop                 {r4-r5, pc}
42
43copy32
44    pld                 [r0, r1, lsl #1]
45    vld1.8              {q0-q1}, [r0], r1
46    pld                 [r0, r1, lsl #1]
47    vld1.8              {q2-q3}, [r0], r1
48    vst1.8              {q0-q1}, [r2@128], r3
49    vst1.8              {q2-q3}, [r2@128], r3
50    subs                r5, r5, #2
51    bgt                 copy32
52    pop                 {r4-r5, pc}
53
54copy16
55    pld                 [r0, r1, lsl #1]
56    vld1.8              {q0}, [r0], r1
57    pld                 [r0, r1, lsl #1]
58    vld1.8              {q1}, [r0], r1
59    vst1.8              {q0}, [r2@128], r3
60    vst1.8              {q1}, [r2@128], r3
61    subs                r5, r5, #2
62    bgt                 copy16
63    pop                 {r4-r5, pc}
64
65copy8
66    pld                 [r0, r1, lsl #1]
67    vld1.8              {d0}, [r0], r1
68    pld                 [r0, r1, lsl #1]
69    vld1.8              {d2}, [r0], r1
70    vst1.8              {d0}, [r2@64], r3
71    vst1.8              {d2}, [r2@64], r3
72    subs                r5, r5, #2
73    bgt                 copy8
74    pop                 {r4-r5, pc}
75
76copy4
77    ldr                 r12, [r0], r1
78    str                 r12, [r2], r3
79    subs                r5, r5, #1
80    bgt                 copy4
81    pop                 {r4-r5, pc}
82    ENDP
83
84    END
85