11dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project/* 21dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * Copyright (C) 2008 The Android Open Source Project 31dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * All rights reserved. 41dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 51dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * Redistribution and use in source and binary forms, with or without 61dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * modification, are permitted provided that the following conditions 71dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * are met: 81dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * * Redistributions of source code must retain the above copyright 91dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * notice, this list of conditions and the following disclaimer. 101dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * * Redistributions in binary form must reproduce the above copyright 111dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * notice, this list of conditions and the following disclaimer in 121dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * the documentation and/or other materials provided with the 131dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * distribution. 141dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 151dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 161dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 171dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 181dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 191dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 201dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 211dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 221dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 231dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 241dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 251dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 261dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * SUCH DAMAGE. 271dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 281dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 291dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project#include <machine/cpu-features.h> 30851e68a2402fa414544e66650e09dfdaac813e51Elliott Hughes#include <private/bionic_asm.h> 311dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3208e72d0161e39e99ff1003bf1ce894f37d7b7eb4Prajakta Gudadhe#if defined(__ARM_NEON__) && !defined(ARCH_ARM_USE_NON_NEON_MEMCPY) 33ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 34ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian .text 35ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian .fpu neon 36ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 37fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding#ifdef HAVE_32_BYTE_CACHE_LINE 38fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding/* a prefetch distance of 2 cache-lines */ 39fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding#define CACHE_LINE_SIZE 32 40fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding#else 41199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian/* a prefetch distance of 4 cache-lines works best experimentally */ 42199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian#define CACHE_LINE_SIZE 64 43fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding#endif 44ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 45487b613e572160e80d0700e1bcd0e405420d14eaEvgeniy StepanovENTRY(memcpy) 46ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian .save {r0, lr} 47ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* start preloading as early as possible */ 486d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding pld [r1, #(CACHE_LINE_SIZE * 0)] 49fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding stmfd sp!, {r0, lr} 506d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding pld [r1, #(CACHE_LINE_SIZE * 1)] 51ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 526d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding/* If Neon supports unaligned access then remove the align code, 536d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding * unless a size limit has been specified. 546d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding */ 556d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding#ifndef NEON_UNALIGNED_ACCESS 56ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* do we have at least 16-bytes to copy (needed for alignment below) */ 57ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian cmp r2, #16 58ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian blo 5f 59ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 606d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding /* check if buffers are aligned. If so, run arm-only version */ 616d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding eor r3, r0, r1 626d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ands r3, r3, #0x3 636d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding beq 11f 646d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding 65fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding /* align destination to cache-line for the write-buffer */ 66ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian rsb r3, r0, #0 67ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ands r3, r3, #0xF 686d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding beq 2f 69ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 70ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* copy up to 15-bytes (count in r3) */ 71ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian sub r2, r2, r3 72ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian movs ip, r3, lsl #31 73ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldrmib lr, [r1], #1 74ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strmib lr, [r0], #1 75ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldrcsb ip, [r1], #1 76ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldrcsb lr, [r1], #1 77ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strcsb ip, [r0], #1 78ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strcsb lr, [r0], #1 79ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian movs ip, r3, lsl #29 80ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian bge 1f 81ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian // copies 4 bytes, destination 32-bits aligned 82ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! 83ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]! 84ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian1: bcc 2f 85ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian // copies 8 bytes, destination 64-bits aligned 86ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld1.8 {d0}, [r1]! 87ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst1.8 {d0}, [r0, :64]! 88ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian2: 896d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding /* preload immediately the next cache line, which we may need */ 906d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding pld [r1, #(CACHE_LINE_SIZE * 0)] 916d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding pld [r1, #(CACHE_LINE_SIZE * 1)] 92ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 93fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding#ifdef HAVE_32_BYTE_CACHE_LINE 94fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding /* make sure we have at least 32 bytes to copy */ 95fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding subs r2, r2, #32 96fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding blo 4f 97fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding 98fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding /* preload all the cache lines we need. 99fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding * NOTE: the number of pld below depends on PREFETCH_DISTANCE, 100fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding * ideally would would increase the distance in the main loop to 101fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding * avoid the goofy code below. In practice this doesn't seem to make 102fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding * a big difference. 103fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding */ 104fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding pld [r1, #(PREFETCH_DISTANCE)] 105fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding 106fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding1: /* The main loop copies 32 bytes at a time */ 107fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding vld1.8 {d0 - d3}, [r1]! 108fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding pld [r1, #(PREFETCH_DISTANCE)] 109fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding subs r2, r2, #32 110fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding vst1.8 {d0 - d3}, [r0, :128]! 111fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding bhs 1b 112fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding#else 113199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian /* make sure we have at least 64 bytes to copy */ 114199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian subs r2, r2, #64 115ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian blo 2f 116ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 1176d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding /* preload all the cache lines we need. */ 1186d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding pld [r1, #(CACHE_LINE_SIZE * 2)] 1196d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding pld [r1, #(CACHE_LINE_SIZE * 3)] 120ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 121199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian1: /* The main loop copies 64 bytes at a time */ 1226d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vld1.8 {d0 - d3}, [r1]! 1236d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vld1.8 {d4 - d7}, [r1]! 1246d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding#ifdef HAVE_32_BYTE_CACHE_LINE 1256d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding pld [r1, #(CACHE_LINE_SIZE * 2)] 1266d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding pld [r1, #(CACHE_LINE_SIZE * 3)] 1276d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding#else 1286d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding pld [r1, #(CACHE_LINE_SIZE * 3)] 1296d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding#endif 130199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian subs r2, r2, #64 1316d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vst1.8 {d0 - d3}, [r0, :128]! 1326d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vst1.8 {d4 - d7}, [r0, :128]! 133ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian bhs 1b 134ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 135ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian2: /* fix-up the remaining count and make sure we have >= 32 bytes left */ 136199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian add r2, r2, #64 137ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian subs r2, r2, #32 138ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian blo 4f 139ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 140ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian3: /* 32 bytes at a time. These cache lines were already preloaded */ 1416d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vld1.8 {d0 - d3}, [r1]! 142ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian subs r2, r2, #32 1436d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vst1.8 {d0 - d3}, [r0, :128]! 144ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian bhs 3b 145fe6338da9168330d44b409b2ee36103e8bfe6697Henrik Smiding#endif 146ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian4: /* less than 32 left */ 147ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian add r2, r2, #32 148ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian tst r2, #0x10 149ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian beq 5f 150ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian // copies 16 bytes, 128-bits aligned 151ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld1.8 {d0, d1}, [r1]! 152ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst1.8 {d0, d1}, [r0, :128]! 153ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian5: /* copy up to 15-bytes (count in r2) */ 154ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian movs ip, r2, lsl #29 155ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian bcc 1f 156ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld1.8 {d0}, [r1]! 157ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst1.8 {d0}, [r0]! 158ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian1: bge 2f 159ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! 160ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]! 161ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian2: movs ip, r2, lsl #31 162ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldrmib r3, [r1], #1 163ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldrcsb ip, [r1], #1 164ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldrcsb lr, [r1], #1 165ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strmib r3, [r0], #1 166ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strcsb ip, [r0], #1 167ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strcsb lr, [r0], #1 168ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 169ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldmfd sp!, {r0, lr} 170ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian bx lr 1716d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding 1726d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding#else /* NEON_UNALIGNED_ACCESS */ 1736d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding 1746d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding // Check so divider is at least 16 bytes, needed for alignment code. 1756d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding cmp r2, #16 1766d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding blo 5f 1776d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding 1786d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding#ifdef NEON_MEMCPY_ALIGNMENT_DIVIDER 1796d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding /* Check the upper size limit for Neon unaligned memory access in memcpy */ 1806d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding#if NEON_MEMCPY_ALIGNMENT_DIVIDER >= 16 1816d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding cmp r2, #NEON_MEMCPY_ALIGNMENT_DIVIDER 1826d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding blo 3f 1836d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding#endif 1846d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding /* check if buffers are aligned. If so, run arm-only version */ 1856d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding eor r3, r0, r1 1866d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ands r3, r3, #0x3 1876d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding beq 11f 1886d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding 1896d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding /* align destination to 16 bytes for the write-buffer */ 1906d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding rsb r3, r0, #0 1916d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ands r3, r3, #0xF 1926d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding beq 3f 1936d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding 1946d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding /* copy up to 15-bytes (count in r3) */ 1956d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding sub r2, r2, r3 1966d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding movs ip, r3, lsl #31 1976d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ldrmib lr, [r1], #1 1986d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding strmib lr, [r0], #1 1996d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ldrcsb ip, [r1], #1 2006d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ldrcsb lr, [r1], #1 2016d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding strcsb ip, [r0], #1 2026d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding strcsb lr, [r0], #1 2036d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding movs ip, r3, lsl #29 2046d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding bge 1f 2056d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding // copies 4 bytes, destination 32-bits aligned 2066d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vld1.32 {d0[0]}, [r1]! 2076d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vst1.32 {d0[0]}, [r0, :32]! 2086d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding1: bcc 2f 2096d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding // copies 8 bytes, destination 64-bits aligned 2106d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vld1.8 {d0}, [r1]! 2116d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vst1.8 {d0}, [r0, :64]! 2126d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding2: 2136d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding /* preload immediately the next cache line, which we may need */ 2146d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding pld [r1, #(CACHE_LINE_SIZE * 0)] 2156d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding pld [r1, #(CACHE_LINE_SIZE * 1)] 2166d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding3: 2176d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding#endif 2186d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding /* make sure we have at least 64 bytes to copy */ 2196d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding subs r2, r2, #64 2206d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding blo 2f 2216d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding 2226d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding /* preload all the cache lines we need */ 2236d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding pld [r1, #(CACHE_LINE_SIZE * 2)] 2246d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding pld [r1, #(CACHE_LINE_SIZE * 3)] 2256d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding 2266d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding1: /* The main loop copies 64 bytes at a time */ 2276d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vld1.8 {d0 - d3}, [r1]! 2286d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vld1.8 {d4 - d7}, [r1]! 2296d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding#ifdef HAVE_32_BYTE_CACHE_LINE 2306d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding pld [r1, #(CACHE_LINE_SIZE * 2)] 2316d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding pld [r1, #(CACHE_LINE_SIZE * 3)] 2326d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding#else 2336d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding pld [r1, #(CACHE_LINE_SIZE * 3)] 2346d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding#endif 2356d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding subs r2, r2, #64 2366d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vst1.8 {d0 - d3}, [r0]! 2376d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vst1.8 {d4 - d7}, [r0]! 2386d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding bhs 1b 2396d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding 2406d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding2: /* fix-up the remaining count and make sure we have >= 32 bytes left */ 2416d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding add r2, r2, #64 2426d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding subs r2, r2, #32 2436d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding blo 4f 2446d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding 2456d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding3: /* 32 bytes at a time. These cache lines were already preloaded */ 2466d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vld1.8 {d0 - d3}, [r1]! 2476d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding subs r2, r2, #32 2486d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vst1.8 {d0 - d3}, [r0]! 2496d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding bhs 3b 2506d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding 2516d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding4: /* less than 32 left */ 2526d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding add r2, r2, #32 2536d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding tst r2, #0x10 2546d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding beq 5f 2556d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding // copies 16 bytes, 128-bits aligned 2566d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vld1.8 {d0, d1}, [r1]! 2576d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vst1.8 {d0, d1}, [r0]! 2586d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding5: /* copy up to 15-bytes (count in r2) */ 2596d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding movs ip, r2, lsl #29 2606d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding bcc 1f 2616d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vld1.8 {d0}, [r1]! 2626d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vst1.8 {d0}, [r0]! 2636d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding1: bge 2f 2646d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vld1.32 {d0[0]}, [r1]! 2656d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding vst1.32 {d0[0]}, [r0]! 2666d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding2: movs ip, r2, lsl #31 2676d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ldrmib r3, [r1], #1 2686d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ldrcsb ip, [r1], #1 2696d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ldrcsb lr, [r1], #1 2706d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding strmib r3, [r0], #1 2716d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding strcsb ip, [r0], #1 2726d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding strcsb lr, [r0], #1 2736d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding 2746d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ldmfd sp!, {r0, lr} 2756d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding bx lr 2766d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding#endif /* NEON_UNALIGNED_ACCESS */ 2776d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding11: 2786d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding /* Simple arm-only copy loop to handle aligned copy operations */ 2796d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding stmfd sp!, {r4, r5, r6, r7, r8} 2806d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding pld [r1, #(CACHE_LINE_SIZE * 2)] 2816d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding 2826d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding /* Check alignment */ 2836d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding rsb r3, r1, #0 2846d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ands r3, #3 2856d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding beq 2f 2866d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding 2876d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding /* align source to 32 bits. We need to insert 2 instructions between 2886d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding * a ldr[b|h] and str[b|h] because byte and half-word instructions 2896d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding * stall 2 cycles. 2906d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding */ 2916d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding movs r12, r3, lsl #31 2926d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */ 2936d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ldrmib r3, [r1], #1 2946d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ldrcsb r4, [r1], #1 2956d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ldrcsb r5, [r1], #1 2966d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding strmib r3, [r0], #1 2976d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding strcsb r4, [r0], #1 2986d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding strcsb r5, [r0], #1 2996d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding2: 3006d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding subs r2, #32 3016d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding blt 5f 3026d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding pld [r1, #(CACHE_LINE_SIZE * 3)] 3036d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding3: /* Main copy loop, copying 32 bytes at a time */ 3046d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding pld [r1, #(CACHE_LINE_SIZE * 4)] 3056d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ldmia r1!, {r3, r4, r5, r6, r7, r8, r12, lr} 3066d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding subs r2, r2, #32 3076d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding stmia r0!, {r3, r4, r5, r6, r7, r8, r12, lr} 3086d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding bge 3b 3096d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding5: /* Handle any remaining bytes */ 3106d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding adds r2, #32 3116d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding beq 6f 3126d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding 3136d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding movs r12, r2, lsl #28 3146d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ldmcsia r1!, {r3, r4, r5, r6} /* 16 bytes */ 3156d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ldmmiia r1!, {r7, r8} /* 8 bytes */ 3166d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding stmcsia r0!, {r3, r4, r5, r6} 3176d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding stmmiia r0!, {r7, r8} 3186d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding movs r12, r2, lsl #30 3196d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ldrcs r3, [r1], #4 /* 4 bytes */ 3206d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ldrmih r4, [r1], #2 /* 2 bytes */ 3216d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding strcs r3, [r0], #4 3226d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding strmih r4, [r0], #2 3236d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding tst r2, #0x1 3246d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ldrneb r3, [r1] /* last byte */ 3256d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding strneb r3, [r0] 3266d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding6: 3276d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ldmfd sp!, {r4, r5, r6, r7, r8} 3286d0bcdc8329dde4d3c83e95475bc670002f41309Henrik Smiding ldmfd sp!, {r0, pc} 329420878c6908cf9c2862888477ec3f424a06cf172Kenny RootEND(memcpy) 330ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 331ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 332ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian#else /* __ARM_ARCH__ < 7 */ 333ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 334ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 3351dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* 3361dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * Optimized memcpy() for ARM. 3371dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 3381dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * note that memcpy() always returns the destination pointer, 3391dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * so we have to preserve R0. 3401dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 341ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 342420878c6908cf9c2862888477ec3f424a06cf172Kenny RootENTRY(memcpy) 343ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* The stack must always be 64-bits aligned to be compliant with the 3441dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * ARM ABI. Since we have to save R0, we might as well save R4 3451dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * which we can use for better pipelining of the reads below 3461dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 3471dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project .save {r0, r4, lr} 3481dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmfd sp!, {r0, r4, lr} 3491dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* Making room for r5-r11 which will be spilled later */ 3501dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project .pad #28 3511dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project sub sp, sp, #28 3521dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3531dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // preload the destination because we'll align it to a cache line 3541dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // with small writes. Also start the source "pump". 355c54ca40aef48009e7b0e5b2b3069aad62ffd3453Elliott Hughes pld [r0, #0] 356c54ca40aef48009e7b0e5b2b3069aad62ffd3453Elliott Hughes pld [r1, #0] 357c54ca40aef48009e7b0e5b2b3069aad62ffd3453Elliott Hughes pld [r1, #32] 3581dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3591dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* it simplifies things to take care of len<4 early */ 3601dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r2, #4 3611dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo copy_last_3_and_return 3621dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3631dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* compute the offset to align the source 3641dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * offset = (4-(src&3))&3 = -src & 3 3651dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 3661dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project rsb r3, r1, #0 3671dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ands r3, r3, #3 3681dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project beq src_aligned 3691dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3701dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* align source to 32 bits. We need to insert 2 instructions between 3711dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * a ldr[b|h] and str[b|h] because byte and half-word instructions 3721dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * stall 2 cycles. 3731dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 3741dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movs r12, r3, lsl #31 3751dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */ 3761dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrmib r3, [r1], #1 3771dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrcsb r4, [r1], #1 3781dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrcsb r12,[r1], #1 3791dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strmib r3, [r0], #1 3801dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r4, [r0], #1 3811dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r12,[r0], #1 382ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 3831dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectsrc_aligned: 3841dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 385ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* see if src and dst are aligned together (congruent) */ 3861dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project eor r12, r0, r1 3871dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project tst r12, #3 3881dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bne non_congruent 3891dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3901dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* Use post-incriment mode for stm to spill r5-r11 to reserved stack 3911dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * frame. Don't update sp. 3921dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 3931dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmea sp, {r5-r11} 3941dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3951dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* align the destination to a cache-line */ 3961dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project rsb r3, r0, #0 3971dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ands r3, r3, #0x1C 3981dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project beq congruent_aligned32 3991dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r3, r2 4001dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project andhi r3, r2, #0x1C 4011dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4021dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* conditionnaly copies 0 to 7 words (length in r3) */ 403ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian movs r12, r3, lsl #28 4041dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmcsia r1!, {r4, r5, r6, r7} /* 16 bytes */ 4051dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmmiia r1!, {r8, r9} /* 8 bytes */ 4061dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmcsia r0!, {r4, r5, r6, r7} 4071dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmmiia r0!, {r8, r9} 4081dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project tst r3, #0x4 4091dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrne r10,[r1], #4 /* 4 bytes */ 4101dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strne r10,[r0], #4 4111dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project sub r2, r2, r3 4121dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4131dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectcongruent_aligned32: 4141dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* 4151dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * here source is aligned to 32 bytes. 4161dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 4171dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4181dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectcached_aligned32: 4191dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project subs r2, r2, #32 4201dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo less_than_32_left 4211dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4221dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* 4231dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * We preload a cache-line up to 64 bytes ahead. On the 926, this will 424ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian * stall only until the requested world is fetched, but the linefill 4251dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * continues in the the background. 4261dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * While the linefill is going, we write our previous cache-line 4271dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * into the write-buffer (which should have some free space). 4281dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * When the linefill is done, the writebuffer will 4291dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * start dumping its content into memory 4301dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 4311dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * While all this is going, we then load a full cache line into 4321dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 8 registers, this cache line should be in the cache by now 4331dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * (or partly in the cache). 4341dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 4351dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * This code should work well regardless of the source/dest alignment. 4361dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 4371dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 4381dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4391dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // Align the preload register to a cache-line because the cpu does 4401dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // "critical word first" (the first word requested is loaded first). 4411dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bic r12, r1, #0x1F 4421dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project add r12, r12, #64 4431dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4441dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: ldmia r1!, { r4-r11 } 445c54ca40aef48009e7b0e5b2b3069aad62ffd3453Elliott Hughes pld [r12, #64] 4461dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project subs r2, r2, #32 4471dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4481dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // NOTE: if r12 is more than 64 ahead of r1, the following ldrhi 4491dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // for ARM9 preload will not be safely guarded by the preceding subs. 450ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian // When it is safely guarded the only possibility to have SIGSEGV here 4511dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // is because the caller overstates the length. 4521dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrhi r3, [r12], #32 /* cheap ARM9 preload */ 4531dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmia r0!, { r4-r11 } 4541dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bhs 1b 455ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 4561dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project add r2, r2, #32 4571dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4581dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4591dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4601dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4611dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectless_than_32_left: 462ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* 4631dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * less than 32 bytes left at this point (length in r2) 4641dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 4651dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4661dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* skip all this if there is nothing to do, which should 4671dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * be a common case (if not executed the code below takes 4681dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * about 16 cycles) 4691dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 4701dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project tst r2, #0x1F 4711dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project beq 1f 4721dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4731dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* conditionnaly copies 0 to 31 bytes */ 474ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian movs r12, r2, lsl #28 4751dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmcsia r1!, {r4, r5, r6, r7} /* 16 bytes */ 4761dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmmiia r1!, {r8, r9} /* 8 bytes */ 4771dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmcsia r0!, {r4, r5, r6, r7} 4781dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmmiia r0!, {r8, r9} 4791dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movs r12, r2, lsl #30 4801dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrcs r3, [r1], #4 /* 4 bytes */ 4811dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrmih r4, [r1], #2 /* 2 bytes */ 482ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strcs r3, [r0], #4 4831dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strmih r4, [r0], #2 4841dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project tst r2, #0x1 4851dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrneb r3, [r1] /* last byte */ 4861dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strneb r3, [r0] 4871dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4881dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* we're done! restore everything and return */ 4891dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: ldmfd sp!, {r5-r11} 4901dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmfd sp!, {r0, r4, lr} 4911dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bx lr 4921dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4931dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /********************************************************************/ 4941dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4951dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectnon_congruent: 4961dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* 4971dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * here source is aligned to 4 bytes 4981dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * but destination is not. 4991dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 500ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian * in the code below r2 is the number of bytes read 5011dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * (the number of bytes written is always smaller, because we have 5021dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * partial words in the shift queue) 5031dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 5041dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r2, #4 5051dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo copy_last_3_and_return 506ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 5071dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* Use post-incriment mode for stm to spill r5-r11 to reserved stack 5081dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * frame. Don't update sp. 5091dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 5101dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmea sp, {r5-r11} 511ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 5121dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* compute shifts needed to align src to dest */ 5131dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project rsb r5, r0, #0 5141dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project and r5, r5, #3 /* r5 = # bytes in partial words */ 515ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian mov r12, r5, lsl #3 /* r12 = right */ 5161dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project rsb lr, r12, #32 /* lr = left */ 517ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 5181dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* read the first word */ 5191dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldr r3, [r1], #4 5201dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project sub r2, r2, #4 521ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 5221dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* write a partial word (0 to 3 bytes), such that destination 5231dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * becomes aligned to 32 bits (r5 = nb of words to copy for alignment) 5241dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 5251dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movs r5, r5, lsl #31 5261dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strmib r3, [r0], #1 527ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian movmi r3, r3, lsr #8 5281dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r3, [r0], #1 5291dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movcs r3, r3, lsr #8 5301dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r3, [r0], #1 5311dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movcs r3, r3, lsr #8 5321dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 5331dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r2, #4 5341dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo partial_word_tail 535ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 5361dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* Align destination to 32 bytes (cache line boundary) */ 5371dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: tst r0, #0x1c 5381dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project beq 2f 5391dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldr r5, [r1], #4 5401dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project sub r2, r2, #4 5411dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r4, r3, r5, lsl lr 5421dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r3, r5, lsr r12 5431dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project str r4, [r0], #4 5441dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r2, #4 5451dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bhs 1b 5461dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo partial_word_tail 5471dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 5481dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* copy 32 bytes at a time */ 5491dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project2: subs r2, r2, #32 5501dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo less_than_thirtytwo 5511dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 5521dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* Use immediate mode for the shifts, because there is an extra cycle 5531dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * for register shifts, which could account for up to 50% of 5541dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * performance hit. 5551dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 5561dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 5571dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r12, #24 5581dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project beq loop24 5591dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r12, #8 5601dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project beq loop8 5611dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 5621dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectloop16: 5631dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldr r12, [r1], #4 5641dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: mov r4, r12 5651dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} 566c54ca40aef48009e7b0e5b2b3069aad62ffd3453Elliott Hughes pld [r1, #64] 5671dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project subs r2, r2, #32 5681dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrhs r12, [r1], #4 5691dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r3, r3, r4, lsl #16 5701dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r4, r4, lsr #16 5711dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r4, r4, r5, lsl #16 5721dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r5, r5, lsr #16 5731dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r5, r5, r6, lsl #16 5741dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r6, r6, lsr #16 5751dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r6, r6, r7, lsl #16 5761dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r7, r7, lsr #16 5771dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r7, r7, r8, lsl #16 5781dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r8, r8, lsr #16 5791dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r8, r8, r9, lsl #16 5801dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r9, r9, lsr #16 5811dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r9, r9, r10, lsl #16 5821dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r10, r10, lsr #16 5831dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r10, r10, r11, lsl #16 5841dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} 5851dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r3, r11, lsr #16 5861dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bhs 1b 5871dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project b less_than_thirtytwo 5881dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 5891dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectloop8: 5901dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldr r12, [r1], #4 5911dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: mov r4, r12 5921dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} 593c54ca40aef48009e7b0e5b2b3069aad62ffd3453Elliott Hughes pld [r1, #64] 5941dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project subs r2, r2, #32 5951dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrhs r12, [r1], #4 5961dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r3, r3, r4, lsl #24 5971dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r4, r4, lsr #8 5981dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r4, r4, r5, lsl #24 5991dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r5, r5, lsr #8 6001dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r5, r5, r6, lsl #24 6011dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r6, r6, lsr #8 6021dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r6, r6, r7, lsl #24 6031dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r7, r7, lsr #8 6041dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r7, r7, r8, lsl #24 6051dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r8, r8, lsr #8 6061dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r8, r8, r9, lsl #24 6071dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r9, r9, lsr #8 6081dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r9, r9, r10, lsl #24 6091dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r10, r10, lsr #8 6101dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r10, r10, r11, lsl #24 6111dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} 6121dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r3, r11, lsr #8 6131dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bhs 1b 6141dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project b less_than_thirtytwo 6151dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 6161dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectloop24: 6171dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldr r12, [r1], #4 6181dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: mov r4, r12 6191dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} 620c54ca40aef48009e7b0e5b2b3069aad62ffd3453Elliott Hughes pld [r1, #64] 6211dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project subs r2, r2, #32 6221dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrhs r12, [r1], #4 6231dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r3, r3, r4, lsl #8 6241dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r4, r4, lsr #24 6251dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r4, r4, r5, lsl #8 6261dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r5, r5, lsr #24 6271dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r5, r5, r6, lsl #8 6281dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r6, r6, lsr #24 6291dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r6, r6, r7, lsl #8 6301dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r7, r7, lsr #24 6311dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r7, r7, r8, lsl #8 6321dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r8, r8, lsr #24 6331dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r8, r8, r9, lsl #8 6341dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r9, r9, lsr #24 6351dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r9, r9, r10, lsl #8 6361dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r10, r10, lsr #24 6371dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r10, r10, r11, lsl #8 6381dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} 6391dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r3, r11, lsr #24 6401dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bhs 1b 6411dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 6421dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 6431dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectless_than_thirtytwo: 6441dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* copy the last 0 to 31 bytes of the source */ 6451dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project rsb r12, lr, #32 /* we corrupted r12, recompute it */ 6461dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project add r2, r2, #32 6471dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r2, #4 6481dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo partial_word_tail 6491dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 6501dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: ldr r5, [r1], #4 6511dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project sub r2, r2, #4 6521dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r4, r3, r5, lsl lr 6531dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r3, r5, lsr r12 6541dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project str r4, [r0], #4 6551dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r2, #4 6561dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bhs 1b 6571dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 6581dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectpartial_word_tail: 6591dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* we have a partial word in the input buffer */ 6601dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movs r5, lr, lsl #(31-3) 6611dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strmib r3, [r0], #1 6621dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movmi r3, r3, lsr #8 6631dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r3, [r0], #1 6641dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movcs r3, r3, lsr #8 6651dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r3, [r0], #1 666ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 6671dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* Refill spilled registers from the stack. Don't update sp. */ 6681dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmfd sp, {r5-r11} 6691dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 6701dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectcopy_last_3_and_return: 6711dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */ 6721dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrmib r2, [r1], #1 6731dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrcsb r3, [r1], #1 6741dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrcsb r12,[r1] 6751dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strmib r2, [r0], #1 6761dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r3, [r0], #1 6771dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r12,[r0] 6781dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 6791dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* we're done! restore sp and spilled registers and return */ 6801dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project add sp, sp, #28 6811dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmfd sp!, {r0, r4, lr} 6821dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bx lr 683420878c6908cf9c2862888477ec3f424a06cf172Kenny RootEND(memcpy) 6841dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 685ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 686ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian#endif /* __ARM_ARCH__ < 7 */ 687