15b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang/* 25b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * Copyright (C) 2013 The Android Open Source Project 35b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. 45b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * All rights reserved. 55b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * 65b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * Redistribution and use in source and binary forms, with or without 75b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * modification, are permitted provided that the following conditions 85b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * are met: 95b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * * Redistributions of source code must retain the above copyright 105b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * notice, this list of conditions and the following disclaimer. 115b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * * Redistributions in binary form must reproduce the above copyright 125b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * notice, this list of conditions and the following disclaimer in 135b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * the documentation and/or other materials provided with the 145b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * distribution. 155b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * 165b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 175b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 185b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 195b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 205b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 215b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 225b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 235b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 245b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 255b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 265b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 275b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * SUCH DAMAGE. 285b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang */ 295b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 305b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang#include <machine/cpu-features.h> 315b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang#include <private/bionic_asm.h> 325b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang#include <private/libc_events.h> 335b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 345b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* 355b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * Optimized memset() for ARM. 365b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * 375b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * memset() returns its first argument. 385b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang */ 395b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 405b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .fpu neon 415b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .syntax unified 425b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 435b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangENTRY(__memset_chk) 445b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang cmp r2, r3 455b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bls .L_done 465b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 475b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // Preserve lr for backtrace. 485b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang push {lr} 495b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .cfi_def_cfa_offset 4 505b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .cfi_rel_offset lr, 0 515b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 525b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 535b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang ldr r0, error_message 545b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang ldr r1, error_code 555b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: 565b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang add r0, pc 575b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bl __fortify_chk_fail 585b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhangerror_code: 595b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .word BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW 605b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhangerror_message: 615b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .word error_string-(1b+8) 625b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangEND(__memset_chk) 635b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 645b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangENTRY(bzero) 655b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang mov r2, r1 665b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang mov r1, #0 675b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_done: 685b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // Fall through to memset... 695b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangEND(bzero) 705b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 715b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangENTRY(memset) 725b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang pldw [r0] 735b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang mov r3, r0 745b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 755b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // Duplicate the low byte of r1 765b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang mov r1, r1, lsl #24 775b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang orr r1, r1, r1, lsr #8 785b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang orr r1, r1, r1, lsr #16 795b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 805b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang cmp r2, #16 815b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang blo .L_less_than_16 825b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 835b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // This section handles regions 16 bytes or larger 845b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // 855b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // Use aligned vst1.8 and vstm when possible. Register values will be: 865b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // ip is scratch 875b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // q0, q1, and r1 contain the memset value 885b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // r2 is the number of bytes to set 895b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // r3 is the advancing destination pointer 905b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vdup.32 q0, r1 915b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 925b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang ands ip, r3, 0xF 935b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang beq .L_memset_aligned 945b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 955b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // Align dest pointer to 16-byte boundary. 965b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang pldw [r0, #64] 975b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang rsb ip, ip, #16 985b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 995b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // Pre-adjust the byte count to reflect post-aligment value. Expecting 1005b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // 8-byte alignment to be rather common so we special case that one. 1015b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang sub r2, r2, ip 1025b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1035b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* set 1 byte */ 1045b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang tst ip, #1 1055b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang it ne 1065b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strbne r1, [r3], #1 1075b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* set 2 bytes */ 1085b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang tst ip, #2 1095b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang it ne 1105b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strhne r1, [r3], #2 1115b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* set 4 bytes */ 1125b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, ip, lsl #29 1135b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang it mi 1145b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strmi r1, [r3], #4 1155b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* set 8 bytes */ 1165b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang itt cs 1175b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strcs r1, [r3], #4 1185b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strcs r1, [r3], #4 1195b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1205b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_memset_aligned: 1215b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // Destination is now 16-byte aligned. Determine how to handle 1225b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // remaining bytes. 1235b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vmov q1, q0 1245b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang cmp r2, #128 1255b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang blo .L_less_than_128 1265b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1275b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // We need to set a larger block of memory. Use four Q regs to 1285b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // set a full cache line in one instruction. Pre-decrement 1295b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // r2 to simplify end-of-loop detection 1305b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vmov q2, q0 1315b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vmov q3, q0 1325b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang pldw [r0, #128] 1335b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang sub r2, r2, #128 1345b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .align 4 1355b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_memset_loop_128: 1365b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang pldw [r3, #192] 1375b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vstm r3!, {q0, q1, q2, q3} 1385b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vstm r3!, {q0, q1, q2, q3} 1395b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang subs r2, r2, #128 1405b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bhs .L_memset_loop_128 1415b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1425b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // Un-bias r2 so it contains the number of bytes left. Early 1435b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // exit if we are done. 1445b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang adds r2, r2, #128 1455b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang beq 2f 1465b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1475b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .align 4 1485b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_less_than_128: 1495b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // set 64 bytes 1505b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, r2, lsl #26 1515b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bcc 1f 1525b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r3, :128]! 1535b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r3, :128]! 1545b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang beq 2f 1555b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: 1565b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // set 32 bytes 1575b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bpl 1f 1585b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r3, :128]! 1595b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: 1605b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // set 16 bytes 1615b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, r2, lsl #28 1625b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bcc 1f 1635b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0}, [r3, :128]! 1645b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang beq 2f 1655b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: 1665b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // set 8 bytes 1675b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bpl 1f 1685b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {d0}, [r3, :64]! 1695b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: 1705b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // set 4 bytes 1715b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang tst r2, #4 1725b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang it ne 1735b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strne r1, [r3], #4 1745b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: 1755b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // set 2 bytes 1765b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, r2, lsl #31 1775b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang it cs 1785b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strhcs r1, [r3], #2 1795b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // set 1 byte 1805b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang it mi 1815b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strbmi r1, [r3] 1825b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang2: 1835b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bx lr 1845b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1855b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_less_than_16: 1865b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // Store up to 15 bytes without worrying about byte alignment 1875b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, r2, lsl #29 1885b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bcc 1f 1895b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang str r1, [r3], #4 1905b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang str r1, [r3], #4 1915b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang beq 2f 1925b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: 1935b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang it mi 1945b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strmi r1, [r3], #4 1955b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, r2, lsl #31 1965b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang it mi 1975b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strbmi r1, [r3], #1 1985b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang itt cs 1995b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strbcs r1, [r3], #1 2005b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strbcs r1, [r3] 2015b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang2: 2025b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bx lr 2035b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangEND(memset) 2045b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 2055b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .data 2065b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhangerror_string: 2075b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .string "memset: prevented write past end of buffer" 208