1233d2500723e5594f3e7c70896ffeeef32b9c950ywan;
2233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3233d2500723e5594f3e7c70896ffeeef32b9c950ywan;
4233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  Use of this source code is governed by a BSD-style license
5233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  that can be found in the LICENSE file in the root of the source
6233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  tree. An additional intellectual property rights grant can be found
7233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  in the file PATENTS.  All contributing project authors may
8233d2500723e5594f3e7c70896ffeeef32b9c950ywan;  be found in the AUTHORS file in the root of the source tree.
9233d2500723e5594f3e7c70896ffeeef32b9c950ywan;
10233d2500723e5594f3e7c70896ffeeef32b9c950ywan
11233d2500723e5594f3e7c70896ffeeef32b9c950ywan
12233d2500723e5594f3e7c70896ffeeef32b9c950ywan    EXPORT |vp8_memcpy_partial_neon|
13233d2500723e5594f3e7c70896ffeeef32b9c950ywan
14233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ARM
15233d2500723e5594f3e7c70896ffeeef32b9c950ywan    REQUIRE8
16233d2500723e5594f3e7c70896ffeeef32b9c950ywan    PRESERVE8
17233d2500723e5594f3e7c70896ffeeef32b9c950ywan
18233d2500723e5594f3e7c70896ffeeef32b9c950ywan    AREA ||.text||, CODE, READONLY, ALIGN=2
19233d2500723e5594f3e7c70896ffeeef32b9c950ywan;=========================================
20233d2500723e5594f3e7c70896ffeeef32b9c950ywan;this is not a full memcpy function!!!
21233d2500723e5594f3e7c70896ffeeef32b9c950ywan;void vp8_memcpy_partial_neon(unsigned char *dst_ptr, unsigned char *src_ptr,
22233d2500723e5594f3e7c70896ffeeef32b9c950ywan;                             int sz);
23233d2500723e5594f3e7c70896ffeeef32b9c950ywan|vp8_memcpy_partial_neon| PROC
24233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ;pld                [r1]                        ;preload pred data
25233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ;pld                [r1, #128]
26233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ;pld                [r1, #256]
27233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ;pld                [r1, #384]
28233d2500723e5594f3e7c70896ffeeef32b9c950ywan
29233d2500723e5594f3e7c70896ffeeef32b9c950ywan    mov             r12, r2, lsr #8                 ;copy 256 bytes data at one time
30233d2500723e5594f3e7c70896ffeeef32b9c950ywan
31233d2500723e5594f3e7c70896ffeeef32b9c950ywanmemcpy_neon_loop
32233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.8          {q0, q1}, [r1]!                 ;load src data
33233d2500723e5594f3e7c70896ffeeef32b9c950ywan    subs            r12, r12, #1
34233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.8          {q2, q3}, [r1]!
35233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.8          {q0, q1}, [r0]!                 ;copy to dst_ptr
36233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.8          {q4, q5}, [r1]!
37233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.8          {q2, q3}, [r0]!
38233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.8          {q6, q7}, [r1]!
39233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.8          {q4, q5}, [r0]!
40233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.8          {q8, q9}, [r1]!
41233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.8          {q6, q7}, [r0]!
42233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.8          {q10, q11}, [r1]!
43233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.8          {q8, q9}, [r0]!
44233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.8          {q12, q13}, [r1]!
45233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.8          {q10, q11}, [r0]!
46233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.8          {q14, q15}, [r1]!
47233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.8          {q12, q13}, [r0]!
48233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.8          {q14, q15}, [r0]!
49233d2500723e5594f3e7c70896ffeeef32b9c950ywan
50233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ;pld                [r1]                        ;preload pred data -- need to adjust for real device
51233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ;pld                [r1, #128]
52233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ;pld                [r1, #256]
53233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ;pld                [r1, #384]
54233d2500723e5594f3e7c70896ffeeef32b9c950ywan
55233d2500723e5594f3e7c70896ffeeef32b9c950ywan    bne             memcpy_neon_loop
56233d2500723e5594f3e7c70896ffeeef32b9c950ywan
57233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ands            r3, r2, #0xff                   ;extra copy
58233d2500723e5594f3e7c70896ffeeef32b9c950ywan    beq             done_copy_neon_loop
59233d2500723e5594f3e7c70896ffeeef32b9c950ywan
60233d2500723e5594f3e7c70896ffeeef32b9c950ywanextra_copy_neon_loop
61233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vld1.8          {q0}, [r1]!                 ;load src data
62233d2500723e5594f3e7c70896ffeeef32b9c950ywan    subs            r3, r3, #16
63233d2500723e5594f3e7c70896ffeeef32b9c950ywan    vst1.8          {q0}, [r0]!
64233d2500723e5594f3e7c70896ffeeef32b9c950ywan    bne             extra_copy_neon_loop
65233d2500723e5594f3e7c70896ffeeef32b9c950ywan
66233d2500723e5594f3e7c70896ffeeef32b9c950ywandone_copy_neon_loop
67233d2500723e5594f3e7c70896ffeeef32b9c950ywan    bx              lr
68233d2500723e5594f3e7c70896ffeeef32b9c950ywan    ENDP
69233d2500723e5594f3e7c70896ffeeef32b9c950ywan
70233d2500723e5594f3e7c70896ffeeef32b9c950ywan    END
71