15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * linux/arch/arm/lib/memset.S 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * Copyright (C) 1995-2000 Russell King 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * This program is free software; you can redistribute it and/or modify 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * it under the terms of the GNU General Public License version 2 as 89ab5563a3196760eb381d102cbb2bc0f7abc6a50Ben Murdoch * published by the Free Software Foundation. 91e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) * 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * ASM optimised string functions 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <linux/linkage.h> 132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include <asm/assembler.h> 142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) .text 162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) .align 5 172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ENTRY(memset) 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ands r3, r0, #3 @ 1 unaligned? 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov ip, r0 @ preserve r0 as return value 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bne 6f @ 1 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * we know that the pointer in ip is aligned to a word boundary. 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)1: orr r1, r1, r1, lsl #8 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) orr r1, r1, r1, lsl #16 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov r3, r1 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmp r2, #16 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) blt 4f 30b2df76ea8fec9e32f6f3718986dba0d95315b29cTorne (Richard Coles) 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if ! CALGN(1)+0 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) * We need 2 extra registers for this loop - use r8 and the LR 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) */ 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) stmfd sp!, {r8, lr} 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov r8, r1 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov lr, r1 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)2: subs r2, r2, #64 412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time. 422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) stmgeia ip!, {r1, r3, r8, lr} 432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) stmgeia ip!, {r1, r3, r8, lr} 442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) stmgeia ip!, {r1, r3, r8, lr} 452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) bgt 2b 461e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. 471e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles)/* 482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * No need to correct the count; we're only testing bits from now on 492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) */ 502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) tst r2, #32 512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) stmneia ip!, {r1, r3, r8, lr} 522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) stmneia ip!, {r1, r3, r8, lr} 532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) tst r2, #16 542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) stmneia ip!, {r1, r3, r8, lr} 551e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) ldmfd sp!, {r8, lr} 561e9bf3e0803691d0a228da41fc608347b6db4340Torne (Richard Coles) 572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#else 582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/* 602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * This version aligns the destination pointer in order to write 612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * whole cache lines at once. 622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) */ 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) stmfd sp!, {r4-r8, lr} 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov r4, r1 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov r5, r1 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov r6, r1 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov r7, r1 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov r8, r1 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) mov lr, r1 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cmp r2, #96 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) tstgt ip, #31 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ble 3f 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) and r8, ip, #31 77 rsb r8, r8, #32 78 sub r2, r2, r8 79 movs r8, r8, lsl #(32 - 4) 80 stmcsia ip!, {r4, r5, r6, r7} 81 stmmiia ip!, {r4, r5} 82 tst r8, #(1 << 30) 83 mov r8, r1 84 strne r1, [ip], #4 85 863: subs r2, r2, #64 87 stmgeia ip!, {r1, r3-r8, lr} 88 stmgeia ip!, {r1, r3-r8, lr} 89 bgt 3b 90 ldmeqfd sp!, {r4-r8, pc} 91 92 tst r2, #32 93 stmneia ip!, {r1, r3-r8, lr} 94 tst r2, #16 95 stmneia ip!, {r4-r7} 96 ldmfd sp!, {r4-r8, lr} 97 98#endif 99 1004: tst r2, #8 101 stmneia ip!, {r1, r3} 102 tst r2, #4 103 strne r1, [ip], #4 104/* 105 * When we get here, we've got less than 4 bytes to zero. We 106 * may have an unaligned pointer as well. 107 */ 1085: tst r2, #2 109 strneb r1, [ip], #1 110 strneb r1, [ip], #1 111 tst r2, #1 112 strneb r1, [ip], #1 113 mov pc, lr 114 1156: subs r2, r2, #4 @ 1 do we have enough 116 blt 5b @ 1 bytes to align with? 117 cmp r3, #2 @ 1 118 strltb r1, [ip], #1 @ 1 119 strleb r1, [ip], #1 @ 1 120 strb r1, [ip], #1 @ 1 121 add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) 122 b 1b 123ENDPROC(memset) 124