strcpy.S revision 1c8ea807ebb54c1533040b60c0a6394abc77e339
1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28/* 29 * Copyright (c) 2013 ARM Ltd 30 * All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. The name of the company may not be used to endorse or promote 41 * products derived from this software without specific prior written 42 * permission. 43 * 44 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 45 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 47 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 49 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 50 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 52 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 53 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 */ 55 56#include <private/bionic_asm.h> 57 58 .syntax unified 59 60 .thumb 61 .thumb_func 62 63 .macro m_push 64 push {r0, r4, r5, lr} 65 .cfi_def_cfa_offset 16 66 .cfi_rel_offset r0, 0 67 .cfi_rel_offset r4, 4 68 .cfi_rel_offset r5, 8 69 .cfi_rel_offset lr, 12 70 .endm // m_push 71 72 .macro m_ret inst 73 \inst {r0, r4, r5, pc} 74 .endm // m_ret 75 76 .macro m_copy_byte reg, cmd, label 77 ldrb \reg, [r1], #1 78 strb \reg, [r0], #1 79 \cmd \reg, \label 80 .endm // m_copy_byte 81 82ENTRY(strcpy) 83 // Unroll the first 8 bytes that will be copied. 84 m_push 85 m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish 86 m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish 87 m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish 88 m_copy_byte reg=r5, cmd=cbz, label=.Lstrcpy_finish 89 m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish 90 m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish 91 m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish 92 m_copy_byte reg=r5, cmd=cbnz, label=.Lstrcpy_continue 93 94.Lstrcpy_finish: 95 m_ret inst=pop 96 97.Lstrcpy_continue: 98 pld [r1, #0] 99 ands r3, r0, #7 100 bne .Lstrcpy_align_dst 101 102.Lstrcpy_check_src_align: 103 // At this point dst is aligned to a double word, check if src 104 // is also aligned to a double word. 105 ands r3, r1, #7 106 bne .Lstrcpy_unaligned_copy 107 108 .p2align 2 109.Lstrcpy_mainloop: 110 ldmia r1!, {r2, r3} 111 112 pld [r1, #64] 113 114 sub ip, r2, #0x01010101 115 bic ip, ip, r2 116 ands ip, ip, #0x80808080 117 bne .Lstrcpy_zero_in_first_register 118 119 sub ip, r3, #0x01010101 120 bic ip, ip, r3 121 ands ip, ip, #0x80808080 122 bne .Lstrcpy_zero_in_second_register 123 124 stmia r0!, {r2, r3} 125 b .Lstrcpy_mainloop 126 127.Lstrcpy_zero_in_first_register: 128 lsls lr, ip, #17 129 itt ne 130 strbne r2, [r0] 131 m_ret inst=popne 132 itt cs 133 strhcs r2, [r0] 134 m_ret inst=popcs 135 lsls ip, ip, #1 136 itt eq 137 streq r2, [r0] 138 m_ret inst=popeq 139 strh r2, [r0], #2 140 lsr r3, r2, #16 141 strb r3, [r0] 142 m_ret inst=pop 143 144.Lstrcpy_zero_in_second_register: 145 lsls lr, ip, #17 146 ittt ne 147 stmiane r0!, {r2} 148 strbne r3, [r0] 149 m_ret inst=popne 150 ittt cs 151 strcs r2, [r0], #4 152 strhcs r3, [r0] 153 m_ret inst=popcs 154 lsls ip, ip, #1 155 itt eq 156 stmiaeq r0, {r2, r3} 157 m_ret inst=popeq 158 stmia r0!, {r2} 159 strh r3, [r0], #2 160 lsr r4, r3, #16 161 strb r4, [r0] 162 m_ret inst=pop 163 164.Lstrcpy_align_dst: 165 // Align to a double word (64 bits). 166 rsb r3, r3, #8 167 lsls ip, r3, #31 168 beq .Lstrcpy_align_to_32 169 170 ldrb r2, [r1], #1 171 strb r2, [r0], #1 172 cbz r2, .Lstrcpy_complete 173 174.Lstrcpy_align_to_32: 175 bcc .Lstrcpy_align_to_64 176 177 ldrb r4, [r1], #1 178 strb r4, [r0], #1 179 cmp r4, #0 180 it eq 181 m_ret inst=popeq 182 ldrb r5, [r1], #1 183 strb r5, [r0], #1 184 cmp r5, #0 185 it eq 186 m_ret inst=popeq 187 188.Lstrcpy_align_to_64: 189 tst r3, #4 190 beq .Lstrcpy_check_src_align 191 ldr r2, [r1], #4 192 193 sub ip, r2, #0x01010101 194 bic ip, ip, r2 195 ands ip, ip, #0x80808080 196 bne .Lstrcpy_zero_in_first_register 197 stmia r0!, {r2} 198 b .Lstrcpy_check_src_align 199 200.Lstrcpy_complete: 201 m_ret inst=pop 202 203.Lstrcpy_unaligned_copy: 204 // Dst is aligned to a double word, while src is at an unknown alignment. 205 // There are 7 different versions of the unaligned copy code 206 // to prevent overreading the src. The mainloop of every single version 207 // will store 64 bits per loop. The difference is how much of src can 208 // be read without potentially crossing a page boundary. 209 tbb [pc, r3] 210.Lstrcpy_unaligned_branchtable: 211 .byte 0 212 .byte ((.Lstrcpy_unalign7 - .Lstrcpy_unaligned_branchtable)/2) 213 .byte ((.Lstrcpy_unalign6 - .Lstrcpy_unaligned_branchtable)/2) 214 .byte ((.Lstrcpy_unalign5 - .Lstrcpy_unaligned_branchtable)/2) 215 .byte ((.Lstrcpy_unalign4 - .Lstrcpy_unaligned_branchtable)/2) 216 .byte ((.Lstrcpy_unalign3 - .Lstrcpy_unaligned_branchtable)/2) 217 .byte ((.Lstrcpy_unalign2 - .Lstrcpy_unaligned_branchtable)/2) 218 .byte ((.Lstrcpy_unalign1 - .Lstrcpy_unaligned_branchtable)/2) 219 220 .p2align 2 221 // Can read 7 bytes before possibly crossing a page. 222.Lstrcpy_unalign7: 223 ldr r2, [r1], #4 224 225 sub ip, r2, #0x01010101 226 bic ip, ip, r2 227 ands ip, ip, #0x80808080 228 bne .Lstrcpy_zero_in_first_register 229 230 ldrb r3, [r1] 231 cbz r3, .Lstrcpy_unalign7_copy5bytes 232 ldrb r4, [r1, #1] 233 cbz r4, .Lstrcpy_unalign7_copy6bytes 234 ldrb r5, [r1, #2] 235 cbz r5, .Lstrcpy_unalign7_copy7bytes 236 237 ldr r3, [r1], #4 238 pld [r1, #64] 239 240 lsrs ip, r3, #24 241 stmia r0!, {r2, r3} 242 beq .Lstrcpy_unalign_return 243 b .Lstrcpy_unalign7 244 245.Lstrcpy_unalign7_copy5bytes: 246 stmia r0!, {r2} 247 strb r3, [r0] 248.Lstrcpy_unalign_return: 249 m_ret inst=pop 250 251.Lstrcpy_unalign7_copy6bytes: 252 stmia r0!, {r2} 253 strb r3, [r0], #1 254 strb r4, [r0], #1 255 m_ret inst=pop 256 257.Lstrcpy_unalign7_copy7bytes: 258 stmia r0!, {r2} 259 strb r3, [r0], #1 260 strb r4, [r0], #1 261 strb r5, [r0], #1 262 m_ret inst=pop 263 264 .p2align 2 265 // Can read 6 bytes before possibly crossing a page. 266.Lstrcpy_unalign6: 267 ldr r2, [r1], #4 268 269 sub ip, r2, #0x01010101 270 bic ip, ip, r2 271 ands ip, ip, #0x80808080 272 bne .Lstrcpy_zero_in_first_register 273 274 ldrb r4, [r1] 275 cbz r4, .Lstrcpy_unalign_copy5bytes 276 ldrb r5, [r1, #1] 277 cbz r5, .Lstrcpy_unalign_copy6bytes 278 279 ldr r3, [r1], #4 280 pld [r1, #64] 281 282 tst r3, #0xff0000 283 beq .Lstrcpy_unalign6_copy7bytes 284 lsrs ip, r3, #24 285 stmia r0!, {r2, r3} 286 beq .Lstrcpy_unalign_return 287 b .Lstrcpy_unalign6 288 289.Lstrcpy_unalign6_copy7bytes: 290 stmia r0!, {r2} 291 strh r3, [r0], #2 292 lsr r3, #16 293 strb r3, [r0] 294 m_ret inst=pop 295 296 .p2align 2 297 // Can read 5 bytes before possibly crossing a page. 298.Lstrcpy_unalign5: 299 ldr r2, [r1], #4 300 301 sub ip, r2, #0x01010101 302 bic ip, ip, r2 303 ands ip, ip, #0x80808080 304 bne .Lstrcpy_zero_in_first_register 305 306 ldrb r4, [r1] 307 cbz r4, .Lstrcpy_unalign_copy5bytes 308 309 ldr r3, [r1], #4 310 311 pld [r1, #64] 312 313 sub ip, r3, #0x01010101 314 bic ip, ip, r3 315 ands ip, ip, #0x80808080 316 bne .Lstrcpy_zero_in_second_register 317 318 stmia r0!, {r2, r3} 319 b .Lstrcpy_unalign5 320 321.Lstrcpy_unalign_copy5bytes: 322 stmia r0!, {r2} 323 strb r4, [r0] 324 m_ret inst=pop 325 326.Lstrcpy_unalign_copy6bytes: 327 stmia r0!, {r2} 328 strb r4, [r0], #1 329 strb r5, [r0] 330 m_ret inst=pop 331 332 .p2align 2 333 // Can read 4 bytes before possibly crossing a page. 334.Lstrcpy_unalign4: 335 ldmia r1!, {r2} 336 337 sub ip, r2, #0x01010101 338 bic ip, ip, r2 339 ands ip, ip, #0x80808080 340 bne .Lstrcpy_zero_in_first_register 341 342 ldmia r1!, {r3} 343 pld [r1, #64] 344 345 sub ip, r3, #0x01010101 346 bic ip, ip, r3 347 ands ip, ip, #0x80808080 348 bne .Lstrcpy_zero_in_second_register 349 350 stmia r0!, {r2, r3} 351 b .Lstrcpy_unalign4 352 353 .p2align 2 354 // Can read 3 bytes before possibly crossing a page. 355.Lstrcpy_unalign3: 356 ldrb r2, [r1] 357 cbz r2, .Lstrcpy_unalign3_copy1byte 358 ldrb r3, [r1, #1] 359 cbz r3, .Lstrcpy_unalign3_copy2bytes 360 ldrb r4, [r1, #2] 361 cbz r4, .Lstrcpy_unalign3_copy3bytes 362 363 ldr r2, [r1], #4 364 ldr r3, [r1], #4 365 366 pld [r1, #64] 367 368 lsrs lr, r2, #24 369 beq .Lstrcpy_unalign_copy4bytes 370 371 sub ip, r3, #0x01010101 372 bic ip, ip, r3 373 ands ip, ip, #0x80808080 374 bne .Lstrcpy_zero_in_second_register 375 376 stmia r0!, {r2, r3} 377 b .Lstrcpy_unalign3 378 379.Lstrcpy_unalign3_copy1byte: 380 strb r2, [r0] 381 m_ret inst=pop 382 383.Lstrcpy_unalign3_copy2bytes: 384 strb r2, [r0], #1 385 strb r3, [r0] 386 m_ret inst=pop 387 388.Lstrcpy_unalign3_copy3bytes: 389 strb r2, [r0], #1 390 strb r3, [r0], #1 391 strb r4, [r0] 392 m_ret inst=pop 393 394 .p2align 2 395 // Can read 2 bytes before possibly crossing a page. 396.Lstrcpy_unalign2: 397 ldrb r2, [r1] 398 cbz r2, .Lstrcpy_unalign_copy1byte 399 ldrb r3, [r1, #1] 400 cbz r3, .Lstrcpy_unalign_copy2bytes 401 402 ldr r2, [r1], #4 403 ldr r3, [r1], #4 404 pld [r1, #64] 405 406 tst r2, #0xff0000 407 beq .Lstrcpy_unalign_copy3bytes 408 lsrs ip, r2, #24 409 beq .Lstrcpy_unalign_copy4bytes 410 411 sub ip, r3, #0x01010101 412 bic ip, ip, r3 413 ands ip, ip, #0x80808080 414 bne .Lstrcpy_zero_in_second_register 415 416 stmia r0!, {r2, r3} 417 b .Lstrcpy_unalign2 418 419 .p2align 2 420 // Can read 1 byte before possibly crossing a page. 421.Lstrcpy_unalign1: 422 ldrb r2, [r1] 423 cbz r2, .Lstrcpy_unalign_copy1byte 424 425 ldr r2, [r1], #4 426 ldr r3, [r1], #4 427 428 pld [r1, #64] 429 430 sub ip, r2, #0x01010101 431 bic ip, ip, r2 432 ands ip, ip, #0x80808080 433 bne .Lstrcpy_zero_in_first_register 434 435 sub ip, r3, #0x01010101 436 bic ip, ip, r3 437 ands ip, ip, #0x80808080 438 bne .Lstrcpy_zero_in_second_register 439 440 stmia r0!, {r2, r3} 441 b .Lstrcpy_unalign1 442 443.Lstrcpy_unalign_copy1byte: 444 strb r2, [r0] 445 m_ret inst=pop 446 447.Lstrcpy_unalign_copy2bytes: 448 strb r2, [r0], #1 449 strb r3, [r0] 450 m_ret inst=pop 451 452.Lstrcpy_unalign_copy3bytes: 453 strh r2, [r0], #2 454 lsr r2, #16 455 strb r2, [r0] 456 m_ret inst=pop 457 458.Lstrcpy_unalign_copy4bytes: 459 stmia r0, {r2} 460 m_ret inst=pop 461END(strcpy) 462