1/*************************************************************************** 2 Copyright (c) 2009,2010, Code Aurora Forum. All rights reserved. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); you 5 may not use this file except in compliance with the License. You may 6 obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 13 implied. See the License for the specific language governing 14 permissions and limitations under the License. 15 ***************************************************************************/ 16 17 .code 32 18 .fpu neon 19 .align 4 20 .globl memset32_neon 21 .func 22 23 /* r0 = buffer, r1 = value, r2 = times to write */ 24memset32_neon: 25 cmp r2, #1 26 streq r1, [r0], #4 27 bxeq lr 28 29 cmp r2, #4 30 bgt memset32_neon_start 31 cmp r2, #0 32 bxeq lr 33memset32_neon_small: 34 str r1, [r0], #4 35 subs r2, r2, #1 36 bne memset32_neon_small 37 bx lr 38memset32_neon_start: 39 cmp r2, #16 40 blt memset32_dropthru 41 vdup.32 q0, r1 42 vmov q1, q0 43 cmp r2, #32 44 blt memset32_16 45 cmp r2, #64 46 blt memset32_32 47 cmp r2, #128 48 blt memset32_64 49memset32_128: 50 movs r12, r2, lsr #7 51memset32_loop128: 52 subs r12, r12, #1 53 vst1.64 {q0, q1}, [r0]! 54 vst1.64 {q0, q1}, [r0]! 55 vst1.64 {q0, q1}, [r0]! 56 vst1.64 {q0, q1}, [r0]! 57 vst1.64 {q0, q1}, [r0]! 58 vst1.64 {q0, q1}, [r0]! 59 vst1.64 {q0, q1}, [r0]! 60 vst1.64 {q0, q1}, [r0]! 61 vst1.64 {q0, q1}, [r0]! 62 vst1.64 {q0, q1}, [r0]! 63 vst1.64 {q0, q1}, [r0]! 64 vst1.64 {q0, q1}, [r0]! 65 vst1.64 {q0, q1}, [r0]! 66 vst1.64 {q0, q1}, [r0]! 67 vst1.64 {q0, q1}, [r0]! 68 vst1.64 {q0, q1}, [r0]! 69 bne memset32_loop128 70 ands r2, r2, #0x7f 71 bxeq lr 72memset32_64: 73 movs r12, r2, lsr #6 74 beq memset32_32 75 vst1.64 {q0, q1}, [r0]! 76 vst1.64 {q0, q1}, [r0]! 77 vst1.64 {q0, q1}, [r0]! 78 vst1.64 {q0, q1}, [r0]! 79 vst1.64 {q0, q1}, [r0]! 80 vst1.64 {q0, q1}, [r0]! 81 vst1.64 {q0, q1}, [r0]! 82 vst1.64 {q0, q1}, [r0]! 83 ands r2, r2, #0x3f 84 bxeq lr 85memset32_32: 86 movs r12, r2, lsr #5 87 beq memset32_16 88 vst1.64 {q0, q1}, [r0]! 89 vst1.64 {q0, q1}, [r0]! 90 vst1.64 {q0, q1}, [r0]! 91 vst1.64 {q0, q1}, [r0]! 92 ands r2, r2, #0x1f 93 bxeq lr 94memset32_16: 95 movs r12, r2, lsr #4 96 beq memset32_dropthru 97 and r2, r2, #0xf 98 vst1.64 {q0, q1}, [r0]! 99 vst1.64 {q0, q1}, [r0]! 100memset32_dropthru: 101 rsb r2, r2, #15 102 add pc, pc, r2, lsl #2 103 nop 104 str r1, [r0, #56] 105 str r1, [r0, #52] 106 str r1, [r0, #48] 107 str r1, [r0, #44] 108 str r1, [r0, #40] 109 str r1, [r0, #36] 110 str r1, [r0, #32] 111 str r1, [r0, #28] 112 str r1, [r0, #24] 113 str r1, [r0, #20] 114 str r1, [r0, #16] 115 str r1, [r0, #12] 116 str r1, [r0, #8] 117 str r1, [r0, #4] 118 str r1, [r0, #0] 119 bx lr 120 121 .endfunc 122 .end 123