memset.S revision acdde8c1cf8e8beed98c052757d96695b820b50c
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <machine/cpu-features.h> 30#include <machine/asm.h> 31 32/* 33 * This code assumes it is running on a processor that supports all arm v7 34 * instructions and that supports neon instructions. 35 */ 36 37 .fpu neon 38 39ENTRY(bzero) 40 mov r2, r1 41 mov r1, #0 42END(bzero) 43 44/* memset() returns its first argument. */ 45ENTRY(memset) 46 # The neon memset only wins for less than 132. 47 cmp r2, #132 48 bhi 11f 49 50 .save {r0} 51 stmfd sp!, {r0} 52 53 vdup.8 q0, r1 54 55 /* make sure we have at least 32 bytes to write */ 56 subs r2, r2, #32 57 blo 2f 58 vmov q1, q0 59 601: /* The main loop writes 32 bytes at a time */ 61 subs r2, r2, #32 62 vst1.8 {d0 - d3}, [r0]! 63 bhs 1b 64 652: /* less than 32 left */ 66 add r2, r2, #32 67 tst r2, #0x10 68 beq 3f 69 70 // writes 16 bytes, 128-bits aligned 71 vst1.8 {d0, d1}, [r0]! 723: /* write up to 15-bytes (count in r2) */ 73 movs ip, r2, lsl #29 74 bcc 1f 75 vst1.8 {d0}, [r0]! 761: bge 2f 77 vst1.32 {d0[0]}, [r0]! 782: movs ip, r2, lsl #31 79 strmib r1, [r0], #1 80 strcsb r1, [r0], #1 81 strcsb r1, [r0], #1 82 ldmfd sp!, {r0} 83 bx lr 8411: 85 /* compute the offset to align the destination 86 * offset = (4-(src&3))&3 = -src & 3 87 */ 88 89 .save {r0, r4-r7, lr} 90 stmfd sp!, {r0, r4-r7, lr} 91 rsb r3, r0, #0 92 ands r3, r3, #3 93 cmp r3, r2 94 movhi r3, r2 95 96 /* splat r1 */ 97 mov r1, r1, lsl #24 98 orr r1, r1, r1, lsr #8 99 orr r1, r1, r1, lsr #16 100 101 movs r12, r3, lsl #31 102 strcsb r1, [r0], #1 /* can't use strh (alignment unknown) */ 103 strcsb r1, [r0], #1 104 strmib r1, [r0], #1 105 subs r2, r2, r3 106 ldmlsfd sp!, {r0, r4-r7, lr} /* return */ 107 bxls lr 108 109 /* align the destination to a cache-line */ 110 mov r12, r1 111 mov lr, r1 112 mov r4, r1 113 mov r5, r1 114 mov r6, r1 115 mov r7, r1 116 117 rsb r3, r0, #0 118 ands r3, r3, #0x1C 119 beq 3f 120 cmp r3, r2 121 andhi r3, r2, #0x1C 122 sub r2, r2, r3 123 124 /* conditionally writes 0 to 7 words (length in r3) */ 125 movs r3, r3, lsl #28 126 stmcsia r0!, {r1, lr} 127 stmcsia r0!, {r1, lr} 128 stmmiia r0!, {r1, lr} 129 movs r3, r3, lsl #2 130 strcs r1, [r0], #4 131 1323: 133 subs r2, r2, #32 134 mov r3, r1 135 bmi 2f 1361: subs r2, r2, #32 137 stmia r0!, {r1,r3,r4,r5,r6,r7,r12,lr} 138 bhs 1b 1392: add r2, r2, #32 140 141 /* conditionally stores 0 to 31 bytes */ 142 movs r2, r2, lsl #28 143 stmcsia r0!, {r1,r3,r12,lr} 144 stmmiia r0!, {r1, lr} 145 movs r2, r2, lsl #2 146 strcs r1, [r0], #4 147 strmih r1, [r0], #2 148 movs r2, r2, lsl #2 149 strcsb r1, [r0] 150 ldmfd sp!, {r0, r4-r7, lr} 151 bx lr 152END(memset) 153