vp8_memcpy_neon.asm revision f71323e297a928af368937089d3ed71239786f86
1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12 EXPORT |vp8_memcpy_neon| 13 14 ARM 15 REQUIRE8 16 PRESERVE8 17 18 AREA ||.text||, CODE, READONLY, ALIGN=2 19;========================================= 20;void vp8_memcpy_neon(unsigned char *dst_ptr, unsigned char *src_ptr, int sz); 21|vp8_memcpy_neon| PROC 22 ;pld [r1] ;preload pred data 23 ;pld [r1, #128] 24 ;pld [r1, #256] 25 ;pld [r1, #384] 26 27 mov r12, r2, lsr #8 ;copy 256 bytes data at one time 28 29memcpy_neon_loop 30 vld1.8 {q0, q1}, [r1]! ;load src data 31 subs r12, r12, #1 32 vld1.8 {q2, q3}, [r1]! 33 vst1.8 {q0, q1}, [r0]! ;copy to dst_ptr 34 vld1.8 {q4, q5}, [r1]! 35 vst1.8 {q2, q3}, [r0]! 36 vld1.8 {q6, q7}, [r1]! 37 vst1.8 {q4, q5}, [r0]! 38 vld1.8 {q8, q9}, [r1]! 39 vst1.8 {q6, q7}, [r0]! 40 vld1.8 {q10, q11}, [r1]! 41 vst1.8 {q8, q9}, [r0]! 42 vld1.8 {q12, q13}, [r1]! 43 vst1.8 {q10, q11}, [r0]! 44 vld1.8 {q14, q15}, [r1]! 45 vst1.8 {q12, q13}, [r0]! 46 vst1.8 {q14, q15}, [r0]! 47 48 ;pld [r1] ;preload pred data -- need to adjust for real device 49 ;pld [r1, #128] 50 ;pld [r1, #256] 51 ;pld [r1, #384] 52 53 bne memcpy_neon_loop 54 55 ands r3, r2, #0xff ;extra copy 56 beq done_copy_neon_loop 57 58extra_copy_neon_loop 59 vld1.8 {q0}, [r1]! ;load src data 60 subs r3, r3, #16 61 vst1.8 {q0}, [r0]! 62 bne extra_copy_neon_loop 63 64done_copy_neon_loop 65 bx lr 66 ENDP 67 68 END 69