memcpy_base.S revision 5b5d6e7045dece4e112553e9a2516240ea32f812
15b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang/* 25b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * Copyright (C) 2008 The Android Open Source Project 35b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * All rights reserved. 45b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * Copyright (c) 2013-2014, NVIDIA Corporation. All rights reserved. 55b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * 65b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * Redistribution and use in source and binary forms, with or without 75b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * modification, are permitted provided that the following conditions 85b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * are met: 95b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * * Redistributions of source code must retain the above copyright 105b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * notice, this list of conditions and the following disclaimer. 115b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * * Redistributions in binary form must reproduce the above copyright 125b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * notice, this list of conditions and the following disclaimer in 135b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * the documentation and/or other materials provided with the 145b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * distribution. 155b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * 165b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 175b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 185b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 195b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 205b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 215b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 225b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 235b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 245b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 255b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 265b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 275b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang * SUCH DAMAGE. 285b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang */ 295b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 305b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang#define CACHE_LINE_SIZE (64) 315b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang#define PREFETCH_DISTANCE (CACHE_LINE_SIZE*6) 325b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 335b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangENTRY_PRIVATE(MEMCPY_BASE) 345b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .cfi_def_cfa_offset 8 355b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .cfi_rel_offset r0, 0 365b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .cfi_rel_offset lr, 4 375b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 385b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang cmp r2, #0 395b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang beq .L_memcpy_done 405b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang cmp r0, r1 415b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang beq .L_memcpy_done 425b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 435b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* preload next cache line */ 445b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang pld [r1, #CACHE_LINE_SIZE*1] 455b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 465b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* Deal with very small blocks (< 32bytes) asap */ 475b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang cmp r2, #32 485b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang blo .L_memcpy_lt_32bytes 495b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* no need to align if len < 128 bytes */ 505b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang cmp r2, #128 515b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang blo .L_memcpy_lt_128bytes 525b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 535b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* large copy, align dest to 64 byte boundry */ 545b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang pld [r1, #CACHE_LINE_SIZE*2] 555b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang rsb r3, r0, #0 565b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang ands r3, r3, #0x3F 575b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang pld [r1, #CACHE_LINE_SIZE*3] 585b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang beq .L_memcpy_dispatch 595b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang sub r2, r2, r3 605b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* copy 1 byte */ 615b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, r3, lsl #31 625b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang itt mi 635b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang ldrbmi ip, [r1], #1 645b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strbmi ip, [r0], #1 655b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* copy 2 bytes */ 665b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang itt cs 675b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang ldrhcs ip, [r1], #2 685b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strhcs ip, [r0], #2 695b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* copy 4 bytes */ 705b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, r3, lsl #29 715b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang itt mi 725b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang ldrmi ip, [r1], #4 735b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strmi ip, [r0], #4 745b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* copy 8 bytes */ 755b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bcc 1f 765b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {d0}, [r1]! 775b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {d0}, [r0, :64]! 785b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: /* copy 16 bytes */ 795b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, r3, lsl #27 805b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bpl 1f 815b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {q0}, [r1]! 825b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0}, [r0, :128]! 835b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: /* copy 32 bytes */ 845b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bcc .L_memcpy_dispatch 855b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {q0, q1}, [r1]! 865b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r0, :256]! 875b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 885b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_memcpy_dispatch: 895b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // pre-decrement by 128 to detect nearly-done condition easily, but 905b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // also need to check if we have less than 128 bytes left at this 915b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // point due to alignment code above 925b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang subs r2, r2, #128 935b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang blo .L_memcpy_lt_128presub 945b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 955b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // Denver does better if both source and dest are aligned so 965b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // we'll special-case that even though the code is virually identical 975b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang tst r1, #0xF 985b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bne .L_memcpy_neon_unalign_src_pld 995b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1005b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // DRAM memcpy should be throttled slightly to get full bandwidth 1015b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang // 1025b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang cmp r2, #32768 1035b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bhi .L_memcpy_neon_unalign_src_pld 1045b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .align 4 1055b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: 1065b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* copy 128 bytes in each loop */ 1075b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang subs r2, r2, #128 1085b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1095b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* preload a cache line */ 1105b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang pld [r1, #PREFETCH_DISTANCE] 1115b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* copy a cache line */ 1125b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {q0, q1}, [r1, :128]! 1135b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r0, :256]! 1145b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {q0, q1}, [r1, :128]! 1155b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r0, :256]! 1165b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* preload a cache line */ 1175b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang pld [r1, #PREFETCH_DISTANCE] 1185b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* copy a cache line */ 1195b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {q0, q1}, [r1, :128]! 1205b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r0, :256]! 1215b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {q0, q1}, [r1, :128]! 1225b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r0, :256]! 1235b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1245b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bhs 1b 1255b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang adds r2, r2, #128 1265b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bne .L_memcpy_lt_128bytes_align 1275b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang pop {r0, pc} 1285b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1295b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang .align 4 1305b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_memcpy_neon_unalign_src_pld: 1315b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: 1325b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* copy 128 bytes in each loop */ 1335b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang subs r2, r2, #128 1345b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1355b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* preload a cache line */ 1365b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang pld [r1, #PREFETCH_DISTANCE] 1375b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* copy a cache line */ 1385b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {q0, q1}, [r1]! 1395b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r0, :256]! 1405b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {q0, q1}, [r1]! 1415b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r0, :256]! 1425b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* preload a cache line */ 1435b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang pld [r1, #PREFETCH_DISTANCE] 1445b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* copy a cache line */ 1455b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {q0, q1}, [r1]! 1465b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r0, :256]! 1475b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {q0, q1}, [r1]! 1485b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r0, :256]! 1495b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1505b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bhs 1b 1515b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang adds r2, r2, #128 1525b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bne .L_memcpy_lt_128bytes_align 1535b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang pop {r0, pc} 1545b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1555b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_memcpy_lt_128presub: 1565b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang add r2, r2, #128 1575b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_memcpy_lt_128bytes_align: 1585b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* copy 64 bytes */ 1595b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, r2, lsl #26 1605b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bcc 1f 1615b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {q0, q1}, [r1]! 1625b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r0, :256]! 1635b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {q0, q1}, [r1]! 1645b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r0, :256]! 1655b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: /* copy 32 bytes */ 1665b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bpl 1f 1675b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {q0, q1}, [r1]! 1685b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r0, :256]! 1695b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: /* copy 16 bytes */ 1705b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, r2, lsl #28 1715b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bcc 1f 1725b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {q0}, [r1]! 1735b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0}, [r0, :128]! 1745b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: /* copy 8 bytes */ 1755b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bpl 1f 1765b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {d0}, [r1]! 1775b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {d0}, [r0, :64]! 1785b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: /* copy 4 bytes */ 1795b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang tst r2, #4 1805b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang itt ne 1815b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang ldrne ip, [r1], #4 1825b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strne ip, [r0], #4 1835b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* copy 2 bytes */ 1845b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, r2, lsl #31 1855b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang itt cs 1865b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang ldrhcs ip, [r1], #2 1875b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strhcs ip, [r0], #2 1885b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* copy 1 byte */ 1895b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang itt mi 1905b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang ldrbmi ip, [r1] 1915b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strbmi ip, [r0] 1925b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1935b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang pop {r0, pc} 1945b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 1955b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_memcpy_lt_128bytes: 1965b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* copy 64 bytes */ 1975b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, r2, lsl #26 1985b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bcc 1f 1995b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {q0, q1}, [r1]! 2005b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r0]! 2015b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {q0, q1}, [r1]! 2025b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r0]! 2035b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: /* copy 32 bytes */ 2045b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bpl .L_memcpy_lt_32bytes 2055b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {q0, q1}, [r1]! 2065b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0, q1}, [r0]! 2075b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_memcpy_lt_32bytes: 2085b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* copy 16 bytes */ 2095b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, r2, lsl #28 2105b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bcc 1f 2115b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {q0}, [r1]! 2125b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {q0}, [r0]! 2135b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: /* copy 8 bytes */ 2145b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang bpl 1f 2155b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vld1.8 {d0}, [r1]! 2165b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang vst1.8 {d0}, [r0]! 2175b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang1: /* copy 4 bytes */ 2185b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang tst r2, #4 2195b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang itt ne 2205b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang ldrne ip, [r1], #4 2215b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strne ip, [r0], #4 2225b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* copy 2 bytes */ 2235b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang movs ip, r2, lsl #31 2245b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang itt cs 2255b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang ldrhcs ip, [r1], #2 2265b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strhcs ip, [r0], #2 2275b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang /* copy 1 byte */ 2285b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang itt mi 2295b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang ldrbmi ip, [r1] 2305b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang strbmi ip, [r0] 2315b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang 2325b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang.L_memcpy_done: 2335b5d6e7045dece4e112553e9a2516240ea32f812Shu Zhang pop {r0, pc} 2345b5d6e7045dece4e112553e9a2516240ea32f812Shu ZhangEND(MEMCPY_BASE) 235