1#------------------------------------------------------------------------------
2#
3# CopyMem() worker for ARM
4#
5# This file started out as C code that did 64 bit moves if the buffer was
6# 32-bit aligned, else it does a byte copy. It also does a byte copy for
7# any trailing bytes. It was updated to do 32-byte copies using stm/ldm.
8#
9# Copyright (c) 2008 - 2010, Apple Inc. All rights reserved.<BR>
10# Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR>
11# This program and the accompanying materials
12# are licensed and made available under the terms and conditions of the BSD License
13# which accompanies this distribution.  The full text of the license may be found at
14# http://opensource.org/licenses/bsd-license.php
15#
16# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
17# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
18#
19#------------------------------------------------------------------------------
20
21    .text
22    .thumb
23    .syntax unified
24
25/**
26  Copy Length bytes from Source to Destination. Overlap is OK.
27
28  This implementation
29
30  @param  Destination Target of copy
31  @param  Source      Place to copy from
32  @param  Length      Number of bytes to copy
33
34  @return Destination
35
36
37VOID *
38EFIAPI
39InternalMemCopyMem (
40  OUT     VOID                      *DestinationBuffer,
41  IN      CONST VOID                *SourceBuffer,
42  IN      UINTN                     Length
43  )
44**/
45ASM_GLOBAL ASM_PFX(InternalMemCopyMem)
46ASM_PFX(InternalMemCopyMem):
47    push    {r4-r11, lr}
48    // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)
49    mov     r11, r0
50    mov     r10, r0
51    mov     r12, r2
52    mov     r14, r1
53
54    cmp     r11, r1
55    // If (dest < source)
56    bcc     memcopy_check_optim_default
57
58    // If (source + length < dest)
59    rsb     r3, r1, r11
60    cmp     r12, r3
61    bcc     memcopy_check_optim_default
62    b       memcopy_check_optim_overlap
63
64memcopy_check_optim_default:
65    // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)
66    tst     r0, #0xF
67    it      ne
68    movne.n r0, #0
69    bne     memcopy_default
70    tst     r1, #0xF
71    it      ne
72    movne.n r3, #0
73    it      eq
74    moveq.n r3, #1
75    cmp     r2, #31
76    it      ls
77    movls.n r0, #0
78    bls     memcopy_default
79    and     r0, r3, #1
80    b       memcopy_default
81
82memcopy_check_optim_overlap:
83    // r10 = dest_end, r14 = source_end
84    add     r10, r11, r12
85    add     r14, r12, r1
86
87    // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)
88    cmp     r2, #31
89    it      ls
90    movls.n r0, #0
91    it      hi
92    movhi.n r0, #1
93    tst     r10, #0xF
94    it      ne
95    movne.n r0, #0
96    tst     r14, #0xF
97    it      ne
98    movne.n r0, #0
99    b       memcopy_overlapped
100
101memcopy_overlapped_non_optim:
102    // We read 1 byte from the end of the source buffer
103    sub     r3, r14, #1
104    sub     r12, r12, #1
105    ldrb    r3, [r3, #0]
106    sub     r2, r10, #1
107    cmp     r12, #0
108    // We write 1 byte at the end of the dest buffer
109    sub     r10, r10, #1
110    sub     r14, r14, #1
111    strb    r3, [r2, #0]
112    bne     memcopy_overlapped_non_optim
113    b       memcopy_end
114
115// r10 = dest_end, r14 = source_end
116memcopy_overlapped:
117    // Are we in the optimized case ?
118    cmp     r0, #0
119    beq     memcopy_overlapped_non_optim
120
121    // Optimized Overlapped - Read 32 bytes
122    sub     r14, r14, #32
123    sub     r12, r12, #32
124    cmp     r12, #31
125    ldmia   r14, {r2-r9}
126
127    // If length is less than 32 then disable optim
128    it      ls
129    movls.n r0, #0
130
131    cmp     r12, #0
132
133    // Optimized Overlapped - Write 32 bytes
134    sub     r10, r10, #32
135    stmia   r10, {r2-r9}
136
137    // while (length != 0)
138    bne     memcopy_overlapped
139    b       memcopy_end
140
141memcopy_default_non_optim:
142    // Byte copy
143    ldrb    r3, [r14], #1
144    sub     r12, r12, #1
145    strb    r3, [r10], #1
146
147memcopy_default:
148    cmp     r12, #0
149    beq     memcopy_end
150
151// r10 = dest, r14 = source
152memcopy_default_loop:
153    cmp     r0, #0
154    beq     memcopy_default_non_optim
155
156    // Optimized memcopy - Read 32 Bytes
157    sub     r12, r12, #32
158    cmp     r12, #31
159    ldmia   r14!, {r2-r9}
160
161    // If length is less than 32 then disable optim
162    it      ls
163    movls.n r0, #0
164
165    cmp     r12, #0
166
167    // Optimized memcopy - Write 32 Bytes
168    stmia   r10!, {r2-r9}
169
170    // while (length != 0)
171    bne     memcopy_default_loop
172
173memcopy_end:
174    mov     r0, r11
175    pop     {r4-r11, pc}
176