1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28/* 29 * Copyright (c) 2013 ARM Ltd 30 * All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. The name of the company may not be used to endorse or promote 41 * products derived from this software without specific prior written 42 * permission. 43 * 44 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 45 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 47 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 49 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 50 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 52 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 53 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 */ 55 56#if !defined(STPCPY) && !defined(STRCPY) 57#error "Either STPCPY or STRCPY must be defined." 58#endif 59 60#include <private/bionic_asm.h> 61 62 .syntax unified 63 64 .thumb 65 .thumb_func 66 67#if defined(STPCPY) 68 .macro m_push 69 push {r4, r5, lr} 70 .cfi_def_cfa_offset 12 71 .cfi_rel_offset r4, 0 72 .cfi_rel_offset r5, 4 73 .cfi_rel_offset lr, 8 74 .endm // m_push 75#else 76 .macro m_push 77 push {r0, r4, r5, lr} 78 .cfi_def_cfa_offset 16 79 .cfi_rel_offset r0, 0 80 .cfi_rel_offset r4, 4 81 .cfi_rel_offset r5, 8 82 .cfi_rel_offset lr, 12 83 .endm // m_push 84#endif 85 86#if defined(STPCPY) 87 .macro m_ret inst 88 \inst {r4, r5, pc} 89 .endm // m_ret 90#else 91 .macro m_ret inst 92 \inst {r0, r4, r5, pc} 93 .endm // m_ret 94#endif 95 96 .macro m_copy_byte reg, cmd, label 97 ldrb \reg, [r1], #1 98 strb \reg, [r0], #1 99 \cmd \reg, \label 100 .endm // m_copy_byte 101 102#if defined(STPCPY) 103ENTRY(stpcpy) 104#else 105ENTRY(strcpy) 106#endif 107 // Unroll the first 8 bytes that will be copied. 108 m_push 109 m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish 110 m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish 111 m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish 112 m_copy_byte reg=r5, cmd=cbz, label=.Lstringcopy_finish 113 m_copy_byte reg=r2, cmd=cbz, label=.Lstringcopy_finish 114 m_copy_byte reg=r3, cmd=cbz, label=.Lstringcopy_finish 115 m_copy_byte reg=r4, cmd=cbz, label=.Lstringcopy_finish 116 m_copy_byte reg=r5, cmd=cbnz, label=.Lstringcopy_continue 117 118.Lstringcopy_finish: 119#if defined(STPCPY) 120 sub r0, r0, #1 121#endif 122 m_ret inst=pop 123 124.Lstringcopy_continue: 125 pld [r1, #0] 126 ands r3, r0, #7 127 bne .Lstringcopy_align_dst 128 129.Lstringcopy_check_src_align: 130 // At this point dst is aligned to a double word, check if src 131 // is also aligned to a double word. 132 ands r3, r1, #7 133 bne .Lstringcopy_unaligned_copy 134 135 .p2align 2 136.Lstringcopy_mainloop: 137 ldmia r1!, {r2, r3} 138 139 pld [r1, #64] 140 141 sub ip, r2, #0x01010101 142 bic ip, ip, r2 143 ands ip, ip, #0x80808080 144 bne .Lstringcopy_zero_in_first_register 145 146 sub ip, r3, #0x01010101 147 bic ip, ip, r3 148 ands ip, ip, #0x80808080 149 bne .Lstringcopy_zero_in_second_register 150 151 stmia r0!, {r2, r3} 152 b .Lstringcopy_mainloop 153 154.Lstringcopy_zero_in_first_register: 155 lsls lr, ip, #17 156 itt ne 157 strbne r2, [r0] 158 m_ret inst=popne 159 itt cs 160#if defined(STPCPY) 161 strhcs r2, [r0], #1 162#else 163 strhcs r2, [r0] 164#endif 165 m_ret inst=popcs 166 lsls ip, ip, #1 167 itt eq 168#if defined(STPCPY) 169 streq r2, [r0], #3 170#else 171 streq r2, [r0] 172#endif 173 m_ret inst=popeq 174 strh r2, [r0], #2 175 lsr r3, r2, #16 176 strb r3, [r0] 177 m_ret inst=pop 178 179.Lstringcopy_zero_in_second_register: 180 lsls lr, ip, #17 181 ittt ne 182 stmiane r0!, {r2} 183 strbne r3, [r0] 184 m_ret inst=popne 185 ittt cs 186 strcs r2, [r0], #4 187#if defined(STPCPY) 188 strhcs r3, [r0], #1 189#else 190 strhcs r3, [r0] 191#endif 192 m_ret inst=popcs 193 lsls ip, ip, #1 194#if defined(STPCPY) 195 ittt eq 196#else 197 itt eq 198#endif 199 stmiaeq r0, {r2, r3} 200#if defined(STPCPY) 201 addeq r0, r0, #7 202#endif 203 m_ret inst=popeq 204 stmia r0!, {r2} 205 strh r3, [r0], #2 206 lsr r4, r3, #16 207 strb r4, [r0] 208 m_ret inst=pop 209 210.Lstringcopy_align_dst: 211 // Align to a double word (64 bits). 212 rsb r3, r3, #8 213 lsls ip, r3, #31 214 beq .Lstringcopy_align_to_32 215 216 ldrb r2, [r1], #1 217 strb r2, [r0], #1 218 cbz r2, .Lstringcopy_complete 219 220.Lstringcopy_align_to_32: 221 bcc .Lstringcopy_align_to_64 222 223 ldrb r4, [r1], #1 224 strb r4, [r0], #1 225 cmp r4, #0 226#if defined(STPCPY) 227 itt eq 228 subeq r0, r0, #1 229#else 230 it eq 231#endif 232 m_ret inst=popeq 233 ldrb r5, [r1], #1 234 strb r5, [r0], #1 235 cmp r5, #0 236#if defined(STPCPY) 237 itt eq 238 subeq r0, r0, #1 239#else 240 it eq 241#endif 242 m_ret inst=popeq 243 244.Lstringcopy_align_to_64: 245 tst r3, #4 246 beq .Lstringcopy_check_src_align 247 // Read one byte at a time since we don't have any idea about the alignment 248 // of the source and we don't want to read into a different page. 249 ldrb r2, [r1], #1 250 strb r2, [r0], #1 251 cbz r2, .Lstringcopy_complete 252 ldrb r2, [r1], #1 253 strb r2, [r0], #1 254 cbz r2, .Lstringcopy_complete 255 ldrb r2, [r1], #1 256 strb r2, [r0], #1 257 cbz r2, .Lstringcopy_complete 258 ldrb r2, [r1], #1 259 strb r2, [r0], #1 260 cbz r2, .Lstringcopy_complete 261 b .Lstringcopy_check_src_align 262 263.Lstringcopy_complete: 264#if defined(STPCPY) 265 sub r0, r0, #1 266#endif 267 m_ret inst=pop 268 269.Lstringcopy_unaligned_copy: 270 // Dst is aligned to a double word, while src is at an unknown alignment. 271 // There are 7 different versions of the unaligned copy code 272 // to prevent overreading the src. The mainloop of every single version 273 // will store 64 bits per loop. The difference is how much of src can 274 // be read without potentially crossing a page boundary. 275 tbb [pc, r3] 276.Lstringcopy_unaligned_branchtable: 277 .byte 0 278 .byte ((.Lstringcopy_unalign7 - .Lstringcopy_unaligned_branchtable)/2) 279 .byte ((.Lstringcopy_unalign6 - .Lstringcopy_unaligned_branchtable)/2) 280 .byte ((.Lstringcopy_unalign5 - .Lstringcopy_unaligned_branchtable)/2) 281 .byte ((.Lstringcopy_unalign4 - .Lstringcopy_unaligned_branchtable)/2) 282 .byte ((.Lstringcopy_unalign3 - .Lstringcopy_unaligned_branchtable)/2) 283 .byte ((.Lstringcopy_unalign2 - .Lstringcopy_unaligned_branchtable)/2) 284 .byte ((.Lstringcopy_unalign1 - .Lstringcopy_unaligned_branchtable)/2) 285 286 .p2align 2 287 // Can read 7 bytes before possibly crossing a page. 288.Lstringcopy_unalign7: 289 ldr r2, [r1], #4 290 291 sub ip, r2, #0x01010101 292 bic ip, ip, r2 293 ands ip, ip, #0x80808080 294 bne .Lstringcopy_zero_in_first_register 295 296 ldrb r3, [r1] 297 cbz r3, .Lstringcopy_unalign7_copy5bytes 298 ldrb r4, [r1, #1] 299 cbz r4, .Lstringcopy_unalign7_copy6bytes 300 ldrb r5, [r1, #2] 301 cbz r5, .Lstringcopy_unalign7_copy7bytes 302 303 ldr r3, [r1], #4 304 pld [r1, #64] 305 306 lsrs ip, r3, #24 307 stmia r0!, {r2, r3} 308#if defined(STPCPY) 309 beq .Lstringcopy_finish 310#else 311 beq .Lstringcopy_unalign_return 312#endif 313 b .Lstringcopy_unalign7 314 315.Lstringcopy_unalign7_copy5bytes: 316 stmia r0!, {r2} 317 strb r3, [r0] 318.Lstringcopy_unalign_return: 319 m_ret inst=pop 320 321.Lstringcopy_unalign7_copy6bytes: 322 stmia r0!, {r2} 323 strb r3, [r0], #1 324 strb r4, [r0] 325 m_ret inst=pop 326 327.Lstringcopy_unalign7_copy7bytes: 328 stmia r0!, {r2} 329 strb r3, [r0], #1 330 strb r4, [r0], #1 331 strb r5, [r0] 332 m_ret inst=pop 333 334 .p2align 2 335 // Can read 6 bytes before possibly crossing a page. 336.Lstringcopy_unalign6: 337 ldr r2, [r1], #4 338 339 sub ip, r2, #0x01010101 340 bic ip, ip, r2 341 ands ip, ip, #0x80808080 342 bne .Lstringcopy_zero_in_first_register 343 344 ldrb r4, [r1] 345 cbz r4, .Lstringcopy_unalign_copy5bytes 346 ldrb r5, [r1, #1] 347 cbz r5, .Lstringcopy_unalign_copy6bytes 348 349 ldr r3, [r1], #4 350 pld [r1, #64] 351 352 tst r3, #0xff0000 353 beq .Lstringcopy_unalign6_copy7bytes 354 lsrs ip, r3, #24 355 stmia r0!, {r2, r3} 356#if defined(STPCPY) 357 beq .Lstringcopy_finish 358#else 359 beq .Lstringcopy_unalign_return 360#endif 361 b .Lstringcopy_unalign6 362 363.Lstringcopy_unalign6_copy7bytes: 364 stmia r0!, {r2} 365 strh r3, [r0], #2 366 lsr r3, #16 367 strb r3, [r0] 368 m_ret inst=pop 369 370 .p2align 2 371 // Can read 5 bytes before possibly crossing a page. 372.Lstringcopy_unalign5: 373 ldr r2, [r1], #4 374 375 sub ip, r2, #0x01010101 376 bic ip, ip, r2 377 ands ip, ip, #0x80808080 378 bne .Lstringcopy_zero_in_first_register 379 380 ldrb r4, [r1] 381 cbz r4, .Lstringcopy_unalign_copy5bytes 382 383 ldr r3, [r1], #4 384 385 pld [r1, #64] 386 387 sub ip, r3, #0x01010101 388 bic ip, ip, r3 389 ands ip, ip, #0x80808080 390 bne .Lstringcopy_zero_in_second_register 391 392 stmia r0!, {r2, r3} 393 b .Lstringcopy_unalign5 394 395.Lstringcopy_unalign_copy5bytes: 396 stmia r0!, {r2} 397 strb r4, [r0] 398 m_ret inst=pop 399 400.Lstringcopy_unalign_copy6bytes: 401 stmia r0!, {r2} 402 strb r4, [r0], #1 403 strb r5, [r0] 404 m_ret inst=pop 405 406 .p2align 2 407 // Can read 4 bytes before possibly crossing a page. 408.Lstringcopy_unalign4: 409 ldmia r1!, {r2} 410 411 sub ip, r2, #0x01010101 412 bic ip, ip, r2 413 ands ip, ip, #0x80808080 414 bne .Lstringcopy_zero_in_first_register 415 416 ldmia r1!, {r3} 417 pld [r1, #64] 418 419 sub ip, r3, #0x01010101 420 bic ip, ip, r3 421 ands ip, ip, #0x80808080 422 bne .Lstringcopy_zero_in_second_register 423 424 stmia r0!, {r2, r3} 425 b .Lstringcopy_unalign4 426 427 .p2align 2 428 // Can read 3 bytes before possibly crossing a page. 429.Lstringcopy_unalign3: 430 ldrb r2, [r1] 431 cbz r2, .Lstringcopy_unalign3_copy1byte 432 ldrb r3, [r1, #1] 433 cbz r3, .Lstringcopy_unalign3_copy2bytes 434 ldrb r4, [r1, #2] 435 cbz r4, .Lstringcopy_unalign3_copy3bytes 436 437 ldr r2, [r1], #4 438 ldr r3, [r1], #4 439 440 pld [r1, #64] 441 442 lsrs lr, r2, #24 443 beq .Lstringcopy_unalign_copy4bytes 444 445 sub ip, r3, #0x01010101 446 bic ip, ip, r3 447 ands ip, ip, #0x80808080 448 bne .Lstringcopy_zero_in_second_register 449 450 stmia r0!, {r2, r3} 451 b .Lstringcopy_unalign3 452 453.Lstringcopy_unalign3_copy1byte: 454 strb r2, [r0] 455 m_ret inst=pop 456 457.Lstringcopy_unalign3_copy2bytes: 458 strb r2, [r0], #1 459 strb r3, [r0] 460 m_ret inst=pop 461 462.Lstringcopy_unalign3_copy3bytes: 463 strb r2, [r0], #1 464 strb r3, [r0], #1 465 strb r4, [r0] 466 m_ret inst=pop 467 468 .p2align 2 469 // Can read 2 bytes before possibly crossing a page. 470.Lstringcopy_unalign2: 471 ldrb r2, [r1] 472 cbz r2, .Lstringcopy_unalign_copy1byte 473 ldrb r3, [r1, #1] 474 cbz r3, .Lstringcopy_unalign_copy2bytes 475 476 ldr r2, [r1], #4 477 ldr r3, [r1], #4 478 pld [r1, #64] 479 480 tst r2, #0xff0000 481 beq .Lstringcopy_unalign_copy3bytes 482 lsrs ip, r2, #24 483 beq .Lstringcopy_unalign_copy4bytes 484 485 sub ip, r3, #0x01010101 486 bic ip, ip, r3 487 ands ip, ip, #0x80808080 488 bne .Lstringcopy_zero_in_second_register 489 490 stmia r0!, {r2, r3} 491 b .Lstringcopy_unalign2 492 493 .p2align 2 494 // Can read 1 byte before possibly crossing a page. 495.Lstringcopy_unalign1: 496 ldrb r2, [r1] 497 cbz r2, .Lstringcopy_unalign_copy1byte 498 499 ldr r2, [r1], #4 500 ldr r3, [r1], #4 501 502 pld [r1, #64] 503 504 sub ip, r2, #0x01010101 505 bic ip, ip, r2 506 ands ip, ip, #0x80808080 507 bne .Lstringcopy_zero_in_first_register 508 509 sub ip, r3, #0x01010101 510 bic ip, ip, r3 511 ands ip, ip, #0x80808080 512 bne .Lstringcopy_zero_in_second_register 513 514 stmia r0!, {r2, r3} 515 b .Lstringcopy_unalign1 516 517.Lstringcopy_unalign_copy1byte: 518 strb r2, [r0] 519 m_ret inst=pop 520 521.Lstringcopy_unalign_copy2bytes: 522 strb r2, [r0], #1 523 strb r3, [r0] 524 m_ret inst=pop 525 526.Lstringcopy_unalign_copy3bytes: 527 strh r2, [r0], #2 528 lsr r2, #16 529 strb r2, [r0] 530 m_ret inst=pop 531 532.Lstringcopy_unalign_copy4bytes: 533 stmia r0, {r2} 534#if defined(STPCPY) 535 add r0, r0, #3 536#endif 537 m_ret inst=pop 538#if defined(STPCPY) 539END(stpcpy) 540#else 541END(strcpy) 542#endif 543