1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <machine/cpu-features.h> 30#include <private/bionic_asm.h> 31#include <private/libc_events.h> 32 33 /* 34 * Optimized memset() for ARM. 35 * 36 * memset() returns its first argument. 37 */ 38 39 .fpu neon 40 .syntax unified 41 42ENTRY(__memset_chk) 43 cmp r2, r3 44 bls .L_done 45 46 // Preserve lr for backtrace. 47 push {lr} 48 .cfi_def_cfa_offset 4 49 .cfi_rel_offset lr, 0 50 51 ldr r0, error_message 52 ldr r1, error_code 531: 54 add r0, pc 55 bl __fortify_chk_fail 56error_code: 57 .word BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW 58error_message: 59 .word error_string-(1b+8) 60END(__memset_chk) 61 62ENTRY(bzero) 63 mov r2, r1 64 mov r1, #0 65.L_done: 66 // Fall through to memset... 67END(bzero) 68 69ENTRY(memset) 70 stmfd sp!, {r0} 71 .cfi_def_cfa_offset 4 72 .cfi_rel_offset r0, 0 73 74 // The new algorithm is slower for copies < 16 so use the old 75 // neon code in that case. 76 cmp r2, #16 77 blo .L_set_less_than_16_unknown_align 78 79 // Use strd which requires an even and odd register so move the 80 // values so that: 81 // r0 and r1 contain the memset value 82 // r2 is the number of bytes to set 83 // r3 is the destination pointer 84 mov r3, r0 85 86 // Copy the byte value in every byte of r1. 87 mov r1, r1, lsl #24 88 orr r1, r1, r1, lsr #8 89 orr r1, r1, r1, lsr #16 90 91.L_check_alignment: 92 // Align destination to a double word to avoid the strd crossing 93 // a cache line boundary. 94 ands ip, r3, #7 95 bne .L_do_double_word_align 96 97.L_double_word_aligned: 98 mov r0, r1 99 100 subs r2, #64 101 blo .L_set_less_than_64 102 1031: // Main loop sets 64 bytes at a time. 104 .irp offset, #0, #8, #16, #24, #32, #40, #48, #56 105 strd r0, r1, [r3, \offset] 106 .endr 107 108 add r3, #64 109 subs r2, #64 110 bge 1b 111 112.L_set_less_than_64: 113 // Restore r2 to the count of bytes left to set. 114 add r2, #64 115 lsls ip, r2, #27 116 bcc .L_set_less_than_32 117 // Set 32 bytes. 118 .irp offset, #0, #8, #16, #24 119 strd r0, r1, [r3, \offset] 120 .endr 121 add r3, #32 122 123.L_set_less_than_32: 124 bpl .L_set_less_than_16 125 // Set 16 bytes. 126 .irp offset, #0, #8 127 strd r0, r1, [r3, \offset] 128 .endr 129 add r3, #16 130 131.L_set_less_than_16: 132 // Less than 16 bytes to set. 133 lsls ip, r2, #29 134 bcc .L_set_less_than_8 135 136 // Set 8 bytes. 137 strd r0, r1, [r3], #8 138 139.L_set_less_than_8: 140 bpl .L_set_less_than_4 141 // Set 4 bytes 142 str r1, [r3], #4 143 144.L_set_less_than_4: 145 lsls ip, r2, #31 146 it ne 147 strbne r1, [r3], #1 148 itt cs 149 strbcs r1, [r3], #1 150 strbcs r1, [r3] 151 152 ldmfd sp!, {r0} 153 bx lr 154 155.L_do_double_word_align: 156 rsb ip, ip, #8 157 sub r2, r2, ip 158 movs r0, ip, lsl #31 159 it mi 160 strbmi r1, [r3], #1 161 itt cs 162 strbcs r1, [r3], #1 163 strbcs r1, [r3], #1 164 165 // Dst is at least word aligned by this point. 166 cmp ip, #4 167 blo .L_double_word_aligned 168 str r1, [r3], #4 169 b .L_double_word_aligned 170 171.L_set_less_than_16_unknown_align: 172 // Set up to 15 bytes. 173 vdup.8 d0, r1 174 movs ip, r2, lsl #29 175 bcc 1f 176 vst1.8 {d0}, [r0]! 1771: bge 2f 178 vst1.32 {d0[0]}, [r0]! 1792: movs ip, r2, lsl #31 180 it mi 181 strbmi r1, [r0], #1 182 itt cs 183 strbcs r1, [r0], #1 184 strbcs r1, [r0], #1 185 ldmfd sp!, {r0} 186 bx lr 187END(memset) 188 189 .data 190error_string: 191 .string "memset: prevented write past end of buffer" 192