15b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang/*
25b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * Copyright (C) 2013 The Android Open Source Project
35b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
45b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * All rights reserved.
55b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang *
65b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * Redistribution and use in source and binary forms, with or without
75b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * modification, are permitted provided that the following conditions
85b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * are met:
95b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang *  * Redistributions of source code must retain the above copyright
105b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang *    notice, this list of conditions and the following disclaimer.
115b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang *  * Redistributions in binary form must reproduce the above copyright
125b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang *    notice, this list of conditions and the following disclaimer in
135b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang *    the documentation and/or other materials provided with the
145b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang *    distribution.
155b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang *
165b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
175b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
185b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
195b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
205b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
215b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
225b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
235b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
245b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
255b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
265b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
275b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * SUCH DAMAGE.
285b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang */
295b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
305b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang#include <machine/cpu-features.h>
315b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang#include <private/bionic_asm.h>
325b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang#include <private/libc_events.h>
335b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
345b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        /*
355b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang         * Optimized memset() for ARM.
365b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang         *
375b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang         * memset() returns its first argument.
385b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang         */
395b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
405b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        .fpu        neon
415b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        .syntax     unified
425b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
435b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangENTRY(__memset_chk)
445b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        cmp         r2, r3
455b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        bls         .L_done
465b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
475b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // Preserve lr for backtrace.
485b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        push        {lr}
495b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        .cfi_def_cfa_offset 4
505b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        .cfi_rel_offset lr, 0
515b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
525b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
535b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        ldr         r0, error_message
545b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        ldr         r1, error_code
555b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1:
565b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        add         r0, pc
575b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        bl          __fortify_chk_fail
585b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhangerror_code:
595b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        .word       BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW
605b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhangerror_message:
615b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        .word       error_string-(1b+8)
625b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangEND(__memset_chk)
635b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
645b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangENTRY(bzero)
655b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        mov         r2, r1
665b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        mov         r1, #0
675b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_done:
685b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // Fall through to memset...
695b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangEND(bzero)
705b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
715b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangENTRY(memset)
725b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        pldw        [r0]
735b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        mov         r3, r0
745b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
755b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // Duplicate the low byte of r1
765b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        mov         r1, r1, lsl #24
775b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        orr         r1, r1, r1, lsr #8
785b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        orr         r1, r1, r1, lsr #16
795b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
805b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        cmp         r2, #16
815b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        blo         .L_less_than_16
825b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
835b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // This section handles regions 16 bytes or larger
845b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        //
855b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // Use aligned vst1.8 and vstm when possible.  Register values will be:
865b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        //   ip is scratch
875b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        //   q0, q1, and r1 contain the memset value
885b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        //   r2 is the number of bytes to set
895b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        //   r3 is the advancing destination pointer
905b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        vdup.32     q0, r1
915b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
925b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        ands        ip, r3, 0xF
935b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        beq         .L_memset_aligned
945b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
955b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // Align dest pointer to 16-byte boundary.
965b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        pldw        [r0, #64]
975b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        rsb         ip, ip, #16
985b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
995b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // Pre-adjust the byte count to reflect post-aligment value.  Expecting
1005b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // 8-byte alignment to be rather common so we special case that one.
1015b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        sub         r2, r2, ip
1025b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
1035b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        /* set 1 byte */
1045b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        tst         ip, #1
1055b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        it          ne
1065b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        strbne      r1, [r3], #1
1075b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        /* set 2 bytes */
1085b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        tst         ip, #2
1095b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        it          ne
1105b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        strhne      r1, [r3], #2
1115b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        /* set 4 bytes */
1125b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        movs        ip, ip, lsl #29
1135b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        it          mi
1145b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        strmi       r1, [r3], #4
1155b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        /* set 8 bytes */
1165b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        itt         cs
1175b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        strcs       r1, [r3], #4
1185b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        strcs       r1, [r3], #4
1195b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
1205b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_memset_aligned:
1215b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // Destination is now 16-byte aligned.  Determine how to handle
1225b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // remaining bytes.
1235b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        vmov        q1, q0
1245b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        cmp         r2, #128
1255b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        blo         .L_less_than_128
1265b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
1275b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // We need to set a larger block of memory.  Use four Q regs to
1285b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // set a full cache line in one instruction.  Pre-decrement
1295b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // r2 to simplify end-of-loop detection
1305b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        vmov        q2, q0
1315b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        vmov        q3, q0
1325b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        pldw        [r0, #128]
1335b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        sub         r2, r2, #128
1345b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        .align 4
1355b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_memset_loop_128:
1365b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        pldw        [r3, #192]
1375b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        vstm        r3!, {q0, q1, q2, q3}
1385b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        vstm        r3!, {q0, q1, q2, q3}
1395b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        subs        r2, r2, #128
1405b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        bhs         .L_memset_loop_128
1415b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
1425b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // Un-bias r2 so it contains the number of bytes left.  Early
1435b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // exit if we are done.
1445b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        adds        r2, r2, #128
1455b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        beq         2f
1465b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
1475b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        .align 4
1485b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_less_than_128:
1495b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // set 64 bytes
1505b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        movs        ip, r2, lsl #26
1515b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        bcc         1f
1525b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        vst1.8      {q0, q1}, [r3, :128]!
1535b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        vst1.8      {q0, q1}, [r3, :128]!
1545b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        beq         2f
1555b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1:
1565b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // set 32 bytes
1575b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        bpl         1f
1585b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        vst1.8      {q0, q1}, [r3, :128]!
1595b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1:
1605b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // set 16 bytes
1615b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        movs        ip, r2, lsl #28
1625b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        bcc         1f
1635b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        vst1.8      {q0}, [r3, :128]!
1645b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        beq         2f
1655b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1:
1665b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // set 8 bytes
1675b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        bpl         1f
1685b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        vst1.8      {d0}, [r3, :64]!
1695b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1:
1705b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // set 4 bytes
1715b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        tst         r2, #4
1725b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        it          ne
1735b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        strne       r1, [r3], #4
1745b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1:
1755b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // set 2 bytes
1765b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        movs        ip, r2, lsl #31
1775b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        it          cs
1785b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        strhcs      r1, [r3], #2
1795b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // set 1 byte
1805b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        it          mi
1815b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        strbmi      r1, [r3]
1825b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang2:
1835b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        bx          lr
1845b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
1855b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_less_than_16:
1865b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        // Store up to 15 bytes without worrying about byte alignment
1875b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        movs        ip, r2, lsl #29
1885b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        bcc         1f
1895b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        str         r1, [r3], #4
1905b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        str         r1, [r3], #4
1915b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        beq         2f
1925b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1:
1935b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        it          mi
1945b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        strmi       r1, [r3], #4
1955b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        movs        ip, r2, lsl #31
1965b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        it          mi
1975b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        strbmi      r1, [r3], #1
1985b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        itt         cs
1995b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        strbcs      r1, [r3], #1
2005b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        strbcs      r1, [r3]
2015b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang2:
2025b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        bx          lr
2035b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangEND(memset)
2045b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang
2055b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        .data
2065b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhangerror_string:
2075b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang        .string     "memset: prevented write past end of buffer"
208