1acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris/*
2acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris * Copyright (C) 2008 The Android Open Source Project
3acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris * All rights reserved.
4acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris *
5acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris * Redistribution and use in source and binary forms, with or without
6acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris * modification, are permitted provided that the following conditions
7acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris * are met:
8acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris *  * Redistributions of source code must retain the above copyright
9acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris *    notice, this list of conditions and the following disclaimer.
10acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris *  * Redistributions in binary form must reproduce the above copyright
11acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris *    notice, this list of conditions and the following disclaimer in
12acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris *    the documentation and/or other materials provided with the
13acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris *    distribution.
14acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris *
15acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris * SUCH DAMAGE.
27acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris */
28acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
29acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris#include <machine/cpu-features.h>
30acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris#include <machine/asm.h>
317c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferris#include "libc_events.h"
32acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
33acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /*
34acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * Optimized memcpy() for ARM.
35acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         *
36acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * note that memcpy() always returns the destination pointer,
37acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * so we have to preserve R0.
38acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         */
39acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
407c860db0747f6276a6e43984d43f8fa5181ea936Christopher FerrisENTRY(__memcpy_chk)
417c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferris        cmp         r2, r3
427c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferris        bgt         fortify_check_failed
437c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferris
447c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferris        // Fall through to memcpy...
457c860db0747f6276a6e43984d43f8fa5181ea936Christopher FerrisEND(__memcpy_chk)
467c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferris
47acdde8c1cf8e8beed98c052757d96695b820b50cChristopher FerrisENTRY(memcpy)
48acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* The stack must always be 64-bits aligned to be compliant with the
49acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * ARM ABI. Since we have to save R0, we might as well save R4
50acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * which we can use for better pipelining of the reads below
51acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         */
52acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        .save       {r0, r4, lr}
53acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        stmfd       sp!, {r0, r4, lr}
54acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* Making room for r5-r11 which will be spilled later */
55acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        .pad        #28
56acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        sub         sp, sp, #28
57acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
58acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        // preload the destination because we'll align it to a cache line
59acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        // with small writes. Also start the source "pump".
60acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        PLD         (r0, #0)
61acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        PLD         (r1, #0)
62acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        PLD         (r1, #32)
63acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
64acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* it simplifies things to take care of len<4 early */
65acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        cmp         r2, #4
66acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        blo         copy_last_3_and_return
67acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
68acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* compute the offset to align the source
69acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * offset = (4-(src&3))&3 = -src & 3
70acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         */
71acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        rsb         r3, r1, #0
72acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ands        r3, r3, #3
73acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        beq         src_aligned
74acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
75acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* align source to 32 bits. We need to insert 2 instructions between
76acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * a ldr[b|h] and str[b|h] because byte and half-word instructions
77acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * stall 2 cycles.
78acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         */
79acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        movs        r12, r3, lsl #31
80acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        sub         r2, r2, r3      /* we know that r3 <= r2 because r2 >= 4 */
81acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldrmib      r3, [r1], #1
82acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldrcsb      r4, [r1], #1
83acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldrcsb      r12,[r1], #1
84acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        strmib      r3, [r0], #1
85acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        strcsb      r4, [r0], #1
86acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        strcsb      r12,[r0], #1
87acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
88acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferrissrc_aligned:
89acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
90acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* see if src and dst are aligned together (congruent) */
91acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        eor         r12, r0, r1
92acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        tst         r12, #3
93acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        bne         non_congruent
94acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
95acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* Use post-incriment mode for stm to spill r5-r11 to reserved stack
96acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * frame. Don't update sp.
97acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         */
98acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        stmea       sp, {r5-r11}
99acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
100acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* align the destination to a cache-line */
101acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        rsb         r3, r0, #0
102acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ands        r3, r3, #0x1C
103acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        beq         congruent_aligned32
104acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        cmp         r3, r2
105acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        andhi       r3, r2, #0x1C
106acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
107acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* conditionally copies 0 to 7 words (length in r3) */
108acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        movs        r12, r3, lsl #28
109acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldmcsia     r1!, {r4, r5, r6, r7}   /* 16 bytes */
110acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldmmiia     r1!, {r8, r9}           /*  8 bytes */
111acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        stmcsia     r0!, {r4, r5, r6, r7}
112acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        stmmiia     r0!, {r8, r9}
113acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        tst         r3, #0x4
114acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldrne       r10,[r1], #4            /*  4 bytes */
115acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        strne       r10,[r0], #4
116acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        sub         r2, r2, r3
117acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
118acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferriscongruent_aligned32:
119acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /*
120acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * here source is aligned to 32 bytes.
121acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         */
122acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
123acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferriscached_aligned32:
124acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        subs        r2, r2, #32
125acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        blo         less_than_32_left
126acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
127acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /*
128acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * We preload a cache-line up to 64 bytes ahead. On the 926, this will
129acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * stall only until the requested world is fetched, but the linefill
130acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * continues in the the background.
131acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * While the linefill is going, we write our previous cache-line
132acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * into the write-buffer (which should have some free space).
133acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * When the linefill is done, the writebuffer will
134acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * start dumping its content into memory
135acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         *
136acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * While all this is going, we then load a full cache line into
137acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * 8 registers, this cache line should be in the cache by now
138acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * (or partly in the cache).
139acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         *
140acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * This code should work well regardless of the source/dest alignment.
141acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         *
142acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         */
143acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
144acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        // Align the preload register to a cache-line because the cpu does
145acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        // "critical word first" (the first word requested is loaded first).
146acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        bic         r12, r1, #0x1F
147acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        add         r12, r12, #64
148acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
149acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris1:      ldmia       r1!, { r4-r11 }
150acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        PLD         (r12, #64)
151acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        subs        r2, r2, #32
152acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
153acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        // NOTE: if r12 is more than 64 ahead of r1, the following ldrhi
154acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        // for ARM9 preload will not be safely guarded by the preceding subs.
155acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        // When it is safely guarded the only possibility to have SIGSEGV here
156acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        // is because the caller overstates the length.
157acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldrhi       r3, [r12], #32      /* cheap ARM9 preload */
158acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        stmia       r0!, { r4-r11 }
159acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        bhs         1b
160acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
161acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        add         r2, r2, #32
162acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
163acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
164acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
165acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
166acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferrisless_than_32_left:
167acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /*
168acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * less than 32 bytes left at this point (length in r2)
169acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         */
170acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
171acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* skip all this if there is nothing to do, which should
172acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * be a common case (if not executed the code below takes
173acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * about 16 cycles)
174acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         */
175acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        tst         r2, #0x1F
176acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        beq         1f
177acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
178acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* conditionnaly copies 0 to 31 bytes */
179acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        movs        r12, r2, lsl #28
180acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldmcsia     r1!, {r4, r5, r6, r7}   /* 16 bytes */
181acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldmmiia     r1!, {r8, r9}           /*  8 bytes */
182acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        stmcsia     r0!, {r4, r5, r6, r7}
183acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        stmmiia     r0!, {r8, r9}
184acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        movs        r12, r2, lsl #30
185acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldrcs       r3, [r1], #4            /*  4 bytes */
186acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldrmih      r4, [r1], #2            /*  2 bytes */
187acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        strcs       r3, [r0], #4
188acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        strmih      r4, [r0], #2
189acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        tst         r2, #0x1
190acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldrneb      r3, [r1]                /*  last byte  */
191acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        strneb      r3, [r0]
192acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
193acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* we're done! restore everything and return */
194acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris1:      ldmfd       sp!, {r5-r11}
195acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldmfd       sp!, {r0, r4, lr}
196acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        bx          lr
197acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
198acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /********************************************************************/
199acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
200acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferrisnon_congruent:
201acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /*
202acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * here source is aligned to 4 bytes
203acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * but destination is not.
204acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         *
205acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * in the code below r2 is the number of bytes read
206acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * (the number of bytes written is always smaller, because we have
207acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * partial words in the shift queue)
208acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         */
209acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        cmp         r2, #4
210acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        blo         copy_last_3_and_return
211acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
212acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* Use post-incriment mode for stm to spill r5-r11 to reserved stack
213acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * frame. Don't update sp.
214acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         */
215acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        stmea       sp, {r5-r11}
216acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
217acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* compute shifts needed to align src to dest */
218acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        rsb         r5, r0, #0
219acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        and         r5, r5, #3          /* r5 = # bytes in partial words */
220acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r12, r5, lsl #3     /* r12 = right */
221acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        rsb         lr, r12, #32        /* lr = left  */
222acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
223acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* read the first word */
224acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldr         r3, [r1], #4
225acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        sub         r2, r2, #4
226acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
227acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* write a partial word (0 to 3 bytes), such that destination
228acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
229acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         */
230acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        movs        r5, r5, lsl #31
231acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        strmib      r3, [r0], #1
232acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        movmi       r3, r3, lsr #8
233acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        strcsb      r3, [r0], #1
234acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        movcs       r3, r3, lsr #8
235acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        strcsb      r3, [r0], #1
236acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        movcs       r3, r3, lsr #8
237acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
238acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        cmp         r2, #4
239acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        blo         partial_word_tail
240acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
241acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* Align destination to 32 bytes (cache line boundary) */
242acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris1:      tst         r0, #0x1c
243acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        beq         2f
244acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldr         r5, [r1], #4
245acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        sub         r2, r2, #4
246acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r4, r3, r5,     lsl lr
247acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r3, r5,         lsr r12
248acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        str         r4, [r0], #4
249acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        cmp         r2, #4
250acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        bhs         1b
251acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        blo         partial_word_tail
252acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
253acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* copy 32 bytes at a time */
254acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris2:      subs        r2, r2, #32
255acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        blo         less_than_thirtytwo
256acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
257acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* Use immediate mode for the shifts, because there is an extra cycle
258acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * for register shifts, which could account for up to 50% of
259acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         * performance hit.
260acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris         */
261acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
262acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        cmp         r12, #24
263acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        beq         loop24
264acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        cmp         r12, #8
265acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        beq         loop8
266acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
267acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferrisloop16:
268acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldr         r12, [r1], #4
269acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris1:      mov         r4, r12
270acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldmia       r1!, {   r5,r6,r7,  r8,r9,r10,r11}
271acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        PLD         (r1, #64)
272acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        subs        r2, r2, #32
273acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldrhs       r12, [r1], #4
274acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r3, r3, r4,     lsl #16
275acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r4, r4,         lsr #16
276acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r4, r4, r5,     lsl #16
277acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r5, r5,         lsr #16
278acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r5, r5, r6,     lsl #16
279acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r6, r6,         lsr #16
280acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r6, r6, r7,     lsl #16
281acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r7, r7,         lsr #16
282acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r7, r7, r8,     lsl #16
283acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r8, r8,         lsr #16
284acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r8, r8, r9,     lsl #16
285acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r9, r9,         lsr #16
286acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r9, r9, r10,    lsl #16
287acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r10, r10,       lsr #16
288acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r10, r10, r11,  lsl #16
289acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        stmia       r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
290acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r3, r11,        lsr #16
291acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        bhs         1b
292acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        b           less_than_thirtytwo
293acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
294acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferrisloop8:
295acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldr         r12, [r1], #4
296acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris1:      mov         r4, r12
297acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldmia       r1!, {   r5,r6,r7,  r8,r9,r10,r11}
298acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        PLD         (r1, #64)
299acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        subs        r2, r2, #32
300acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldrhs       r12, [r1], #4
301acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r3, r3, r4,     lsl #24
302acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r4, r4,         lsr #8
303acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r4, r4, r5,     lsl #24
304acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r5, r5,         lsr #8
305acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r5, r5, r6,     lsl #24
306acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r6, r6,         lsr #8
307acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r6, r6, r7,     lsl #24
308acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r7, r7,         lsr #8
309acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r7, r7, r8,     lsl #24
310acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r8, r8,         lsr #8
311acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r8, r8, r9,     lsl #24
312acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r9, r9,         lsr #8
313acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r9, r9, r10,    lsl #24
314acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r10, r10,       lsr #8
315acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r10, r10, r11,  lsl #24
316acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        stmia       r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
317acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r3, r11,        lsr #8
318acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        bhs         1b
319acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        b           less_than_thirtytwo
320acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
321acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferrisloop24:
322acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldr         r12, [r1], #4
323acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris1:      mov         r4, r12
324acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldmia       r1!, {   r5,r6,r7,  r8,r9,r10,r11}
325acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        PLD         (r1, #64)
326acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        subs        r2, r2, #32
327acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldrhs       r12, [r1], #4
328acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r3, r3, r4,     lsl #8
329acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r4, r4,         lsr #24
330acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r4, r4, r5,     lsl #8
331acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r5, r5,         lsr #24
332acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r5, r5, r6,     lsl #8
333acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r6, r6,         lsr #24
334acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r6, r6, r7,     lsl #8
335acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r7, r7,         lsr #24
336acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r7, r7, r8,     lsl #8
337acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r8, r8,         lsr #24
338acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r8, r8, r9,     lsl #8
339acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r9, r9,         lsr #24
340acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r9, r9, r10,    lsl #8
341acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r10, r10,       lsr #24
342acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r10, r10, r11,  lsl #8
343acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        stmia       r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
344acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r3, r11,        lsr #24
345acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        bhs         1b
346acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
347acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
348acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferrisless_than_thirtytwo:
349acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* copy the last 0 to 31 bytes of the source */
350acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        rsb         r12, lr, #32        /* we corrupted r12, recompute it  */
351acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        add         r2, r2, #32
352acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        cmp         r2, #4
353acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        blo         partial_word_tail
354acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
355acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris1:      ldr         r5, [r1], #4
356acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        sub         r2, r2, #4
357acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        orr         r4, r3, r5,     lsl lr
358acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        mov         r3, r5,         lsr r12
359acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        str         r4, [r0], #4
360acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        cmp         r2, #4
361acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        bhs         1b
362acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
363acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferrispartial_word_tail:
364acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* we have a partial word in the input buffer */
365acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        movs        r5, lr, lsl #(31-3)
366acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        strmib      r3, [r0], #1
367acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        movmi       r3, r3, lsr #8
368acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        strcsb      r3, [r0], #1
369acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        movcs       r3, r3, lsr #8
370acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        strcsb      r3, [r0], #1
371acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
372acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* Refill spilled registers from the stack. Don't update sp. */
373acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldmfd       sp, {r5-r11}
374acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
375acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferriscopy_last_3_and_return:
376acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        movs        r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */
377acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldrmib      r2, [r1], #1
378acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldrcsb      r3, [r1], #1
379acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldrcsb      r12,[r1]
380acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        strmib      r2, [r0], #1
381acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        strcsb      r3, [r0], #1
382acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        strcsb      r12,[r0]
383acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris
384acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        /* we're done! restore sp and spilled registers and return */
385acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        add         sp,  sp, #28
386acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        ldmfd       sp!, {r0, r4, lr}
387acdde8c1cf8e8beed98c052757d96695b820b50cChristopher Ferris        bx          lr
3887c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferris
3897c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferris        // Only reached when the __memcpy_chk check fails.
3907c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferrisfortify_check_failed:
3917c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferris        ldr     r0, error_message
3927c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferris        ldr     r1, error_code
3937c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferris1:
3947c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferris        add     r0, pc
3957c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferris        bl      __fortify_chk_fail
3967c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferriserror_code:
3977c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferris        .word   BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
3987c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferriserror_message:
3997c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferris        .word   error_string-(1b+8)
400acdde8c1cf8e8beed98c052757d96695b820b50cChristopher FerrisEND(memcpy)
4017c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferris
4027c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferris        .data
4037c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferriserror_string:
4047c860db0747f6276a6e43984d43f8fa5181ea936Christopher Ferris        .string     "memcpy buffer overflow"
405