memset.S revision ce46f5576ad0c9aefd842492949c4d2965e23e89
15b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang/* 25b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * Copyright (C) 2013 The Android Open Source Project 35b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. 45b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * All rights reserved. 55b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * 65b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * Redistribution and use in source and binary forms, with or without 75b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * modification, are permitted provided that the following conditions 85b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * are met: 95b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * * Redistributions of source code must retain the above copyright 105b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * notice, this list of conditions and the following disclaimer. 115b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * * Redistributions in binary form must reproduce the above copyright 125b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * notice, this list of conditions and the following disclaimer in 135b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * the documentation and/or other materials provided with the 145b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * distribution. 155b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * 165b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 175b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 185b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 195b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 205b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 215b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 225b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 235b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 245b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 255b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 265b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 275b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * SUCH DAMAGE. 285b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang */ 295b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 305b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang#include <machine/cpu-features.h> 315b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang#include <private/bionic_asm.h> 325b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang#include <private/libc_events.h> 335b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 345b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* 355b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * Optimized memset() for ARM. 365b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * 375b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * memset() returns its first argument. 385b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang */ 395b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 40ce46f5576ad0c9aefd842492949c4d2965e23e89Bernhard Rosenkränzer .cpu cortex-a15 415b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .fpu neon 425b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .syntax unified 435b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 445b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangENTRY(__memset_chk) 455b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang cmp r2, r3 465b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bls .L_done 475b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 485b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // Preserve lr for backtrace. 495b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang push {lr} 505b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .cfi_def_cfa_offset 4 515b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .cfi_rel_offset lr, 0 525b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 535b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 545b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang ldr r0, error_message 555b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang ldr r1, error_code 565b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: 575b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang add r0, pc 585b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bl __fortify_chk_fail 595b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhangerror_code: 605b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .word BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW 615b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhangerror_message: 625b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .word error_string-(1b+8) 635b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangEND(__memset_chk) 645b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 655b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangENTRY(bzero) 665b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang mov r2, r1 675b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang mov r1, #0 685b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_done: 695b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // Fall through to memset... 705b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangEND(bzero) 715b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 725b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangENTRY(memset) 735b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang pldw [r0] 745b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang mov r3, r0 755b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 765b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // Duplicate the low byte of r1 775b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang mov r1, r1, lsl #24 785b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang orr r1, r1, r1, lsr #8 795b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang orr r1, r1, r1, lsr #16 805b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 815b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang cmp r2, #16 825b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang blo .L_less_than_16 835b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 845b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // This section handles regions 16 bytes or larger 855b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // 865b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // Use aligned vst1.8 and vstm when possible. Register values will be: 875b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // ip is scratch 885b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // q0, q1, and r1 contain the memset value 895b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // r2 is the number of bytes to set 905b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // r3 is the advancing destination pointer 915b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vdup.32 q0, r1 925b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 935b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang ands ip, r3, 0xF 945b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang beq .L_memset_aligned 955b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 965b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // Align dest pointer to 16-byte boundary. 975b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang pldw [r0, #64] 985b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang rsb ip, ip, #16 995b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1005b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // Pre-adjust the byte count to reflect post-aligment value. Expecting 1015b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // 8-byte alignment to be rather common so we special case that one. 1025b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang sub r2, r2, ip 1035b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1045b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* set 1 byte */ 1055b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang tst ip, #1 1065b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang it ne 1075b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strbne r1, [r3], #1 1085b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* set 2 bytes */ 1095b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang tst ip, #2 1105b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang it ne 1115b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strhne r1, [r3], #2 1125b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* set 4 bytes */ 1135b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, ip, lsl #29 1145b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang it mi 1155b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strmi r1, [r3], #4 1165b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* set 8 bytes */ 1175b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang itt cs 1185b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strcs r1, [r3], #4 1195b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strcs r1, [r3], #4 1205b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1215b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_memset_aligned: 1225b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // Destination is now 16-byte aligned. Determine how to handle 1235b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // remaining bytes. 1245b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vmov q1, q0 1255b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang cmp r2, #128 1265b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang blo .L_less_than_128 1275b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1285b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // We need to set a larger block of memory. Use four Q regs to 1295b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // set a full cache line in one instruction. Pre-decrement 1305b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // r2 to simplify end-of-loop detection 1315b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vmov q2, q0 1325b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vmov q3, q0 1335b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang pldw [r0, #128] 1345b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang sub r2, r2, #128 1355b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .align 4 1365b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_memset_loop_128: 1375b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang pldw [r3, #192] 1385b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vstm r3!, {q0, q1, q2, q3} 1395b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vstm r3!, {q0, q1, q2, q3} 1405b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang subs r2, r2, #128 1415b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bhs .L_memset_loop_128 1425b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1435b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // Un-bias r2 so it contains the number of bytes left. Early 1445b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // exit if we are done. 1455b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang adds r2, r2, #128 1465b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang beq 2f 1475b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1485b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .align 4 1495b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_less_than_128: 1505b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // set 64 bytes 1515b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, r2, lsl #26 1525b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bcc 1f 1535b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r3, :128]! 1545b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r3, :128]! 1555b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang beq 2f 1565b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: 1575b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // set 32 bytes 1585b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bpl 1f 1595b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r3, :128]! 1605b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: 1615b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // set 16 bytes 1625b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, r2, lsl #28 1635b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bcc 1f 1645b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0}, [r3, :128]! 1655b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang beq 2f 1665b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: 1675b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // set 8 bytes 1685b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bpl 1f 1695b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {d0}, [r3, :64]! 1705b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: 1715b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // set 4 bytes 1725b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang tst r2, #4 1735b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang it ne 1745b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strne r1, [r3], #4 1755b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: 1765b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // set 2 bytes 1775b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, r2, lsl #31 1785b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang it cs 1795b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strhcs r1, [r3], #2 1805b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // set 1 byte 1815b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang it mi 1825b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strbmi r1, [r3] 1835b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang2: 1845b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bx lr 1855b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1865b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_less_than_16: 1875b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // Store up to 15 bytes without worrying about byte alignment 1885b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, r2, lsl #29 1895b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bcc 1f 1905b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang str r1, [r3], #4 1915b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang str r1, [r3], #4 1925b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang beq 2f 1935b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: 1945b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang it mi 1955b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strmi r1, [r3], #4 1965b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, r2, lsl #31 1975b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang it mi 1985b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strbmi r1, [r3], #1 1995b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang itt cs 2005b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strbcs r1, [r3], #1 2015b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strbcs r1, [r3] 2025b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang2: 2035b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bx lr 2045b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangEND(memset) 2055b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 2065b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .data 2075b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhangerror_string: 2085b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .string "memset: prevented write past end of buffer" 209