memcpy.S revision 08e72d0161e39e99ff1003bf1ce894f37d7b7eb4
11dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project/* 21dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * Copyright (C) 2008 The Android Open Source Project 31dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * All rights reserved. 41dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 51dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * Redistribution and use in source and binary forms, with or without 61dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * modification, are permitted provided that the following conditions 71dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * are met: 81dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * * Redistributions of source code must retain the above copyright 91dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * notice, this list of conditions and the following disclaimer. 101dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * * Redistributions in binary form must reproduce the above copyright 111dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * notice, this list of conditions and the following disclaimer in 121dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * the documentation and/or other materials provided with the 131dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * distribution. 141dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 151dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 161dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 171dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 181dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 191dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 201dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 211dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 221dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 231dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 241dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 251dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 261dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * SUCH DAMAGE. 271dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 281dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 291dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project#include <machine/cpu-features.h> 30420878c6908cf9c2862888477ec3f424a06cf172Kenny Root#include <machine/asm.h> 311dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3208e72d0161e39e99ff1003bf1ce894f37d7b7eb4Prajakta Gudadhe#if defined(__ARM_NEON__) && !defined(ARCH_ARM_USE_NON_NEON_MEMCPY) 33ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 34ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian .text 35ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian .fpu neon 36ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 37199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian/* a prefetch distance of 4 cache-lines works best experimentally */ 38199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian#define CACHE_LINE_SIZE 64 39199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian#define PREFETCH_DISTANCE (CACHE_LINE_SIZE*4) 40ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 41487b613e572160e80d0700e1bcd0e405420d14eaEvgeniy StepanovENTRY(memcpy) 42ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian .save {r0, lr} 43ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian stmfd sp!, {r0, lr} 44ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 45ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* start preloading as early as possible */ 46199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian pld [r1, #(CACHE_LINE_SIZE*0)] 47199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian pld [r1, #(CACHE_LINE_SIZE*1)] 48ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 49ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* do we have at least 16-bytes to copy (needed for alignment below) */ 50ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian cmp r2, #16 51ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian blo 5f 52ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 53ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* align destination to half cache-line for the write-buffer */ 54ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian rsb r3, r0, #0 55ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ands r3, r3, #0xF 56ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian beq 0f 57ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 58ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* copy up to 15-bytes (count in r3) */ 59ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian sub r2, r2, r3 60ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian movs ip, r3, lsl #31 61ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldrmib lr, [r1], #1 62ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strmib lr, [r0], #1 63ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldrcsb ip, [r1], #1 64ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldrcsb lr, [r1], #1 65ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strcsb ip, [r0], #1 66ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strcsb lr, [r0], #1 67ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian movs ip, r3, lsl #29 68ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian bge 1f 69ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian // copies 4 bytes, destination 32-bits aligned 70ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! 71ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]! 72ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian1: bcc 2f 73ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian // copies 8 bytes, destination 64-bits aligned 74ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld1.8 {d0}, [r1]! 75ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst1.8 {d0}, [r0, :64]! 76ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian2: 77ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 78ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian0: /* preload immediately the next cache line, which we may need */ 79199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian pld [r1, #(CACHE_LINE_SIZE*0)] 80199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian pld [r1, #(CACHE_LINE_SIZE*1)] 81ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 82199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian /* make sure we have at least 64 bytes to copy */ 83199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian subs r2, r2, #64 84ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian blo 2f 85ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 86ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* preload all the cache lines we need. 87ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian * NOTE: the number of pld below depends on PREFETCH_DISTANCE, 88ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian * ideally would would increase the distance in the main loop to 89ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian * avoid the goofy code below. In practice this doesn't seem to make 90ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian * a big difference. 91ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian */ 92199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian pld [r1, #(CACHE_LINE_SIZE*2)] 93199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian pld [r1, #(CACHE_LINE_SIZE*3)] 94199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian pld [r1, #(PREFETCH_DISTANCE)] 95ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 96199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian1: /* The main loop copies 64 bytes at a time */ 97ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld1.8 {d0 - d3}, [r1]! 98ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld1.8 {d4 - d7}, [r1]! 99199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian pld [r1, #(PREFETCH_DISTANCE)] 100199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian subs r2, r2, #64 101ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst1.8 {d0 - d3}, [r0, :128]! 102ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst1.8 {d4 - d7}, [r0, :128]! 103ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian bhs 1b 104ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 105ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian2: /* fix-up the remaining count and make sure we have >= 32 bytes left */ 106199f9d923804d74e021dd80e48ec75c0a96dba77Mathias Agopian add r2, r2, #64 107ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian subs r2, r2, #32 108ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian blo 4f 109ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 110ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian3: /* 32 bytes at a time. These cache lines were already preloaded */ 111ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld1.8 {d0 - d3}, [r1]! 112ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian subs r2, r2, #32 113ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst1.8 {d0 - d3}, [r0, :128]! 114ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian bhs 3b 115ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 116ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian4: /* less than 32 left */ 117ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian add r2, r2, #32 118ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian tst r2, #0x10 119ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian beq 5f 120ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian // copies 16 bytes, 128-bits aligned 121ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld1.8 {d0, d1}, [r1]! 122ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst1.8 {d0, d1}, [r0, :128]! 123ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 124ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian5: /* copy up to 15-bytes (count in r2) */ 125ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian movs ip, r2, lsl #29 126ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian bcc 1f 127ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld1.8 {d0}, [r1]! 128ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst1.8 {d0}, [r0]! 129ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian1: bge 2f 130ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! 131ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]! 132ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian2: movs ip, r2, lsl #31 133ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldrmib r3, [r1], #1 134ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldrcsb ip, [r1], #1 135ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldrcsb lr, [r1], #1 136ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strmib r3, [r0], #1 137ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strcsb ip, [r0], #1 138ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strcsb lr, [r0], #1 139ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 140ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldmfd sp!, {r0, lr} 141ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian bx lr 142420878c6908cf9c2862888477ec3f424a06cf172Kenny RootEND(memcpy) 143ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 144ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 145ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian#else /* __ARM_ARCH__ < 7 */ 146ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 147ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 1481dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* 1491dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * Optimized memcpy() for ARM. 1501dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 1511dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * note that memcpy() always returns the destination pointer, 1521dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * so we have to preserve R0. 1531dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 154ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 155420878c6908cf9c2862888477ec3f424a06cf172Kenny RootENTRY(memcpy) 156ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* The stack must always be 64-bits aligned to be compliant with the 1571dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * ARM ABI. Since we have to save R0, we might as well save R4 1581dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * which we can use for better pipelining of the reads below 1591dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 1601dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project .save {r0, r4, lr} 1611dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmfd sp!, {r0, r4, lr} 1621dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* Making room for r5-r11 which will be spilled later */ 1631dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project .pad #28 1641dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project sub sp, sp, #28 1651dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 1661dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // preload the destination because we'll align it to a cache line 1671dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // with small writes. Also start the source "pump". 1681dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project PLD (r0, #0) 1691dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project PLD (r1, #0) 1701dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project PLD (r1, #32) 1711dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 1721dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* it simplifies things to take care of len<4 early */ 1731dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r2, #4 1741dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo copy_last_3_and_return 1751dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 1761dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* compute the offset to align the source 1771dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * offset = (4-(src&3))&3 = -src & 3 1781dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 1791dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project rsb r3, r1, #0 1801dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ands r3, r3, #3 1811dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project beq src_aligned 1821dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 1831dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* align source to 32 bits. We need to insert 2 instructions between 1841dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * a ldr[b|h] and str[b|h] because byte and half-word instructions 1851dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * stall 2 cycles. 1861dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 1871dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movs r12, r3, lsl #31 1881dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */ 1891dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrmib r3, [r1], #1 1901dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrcsb r4, [r1], #1 1911dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrcsb r12,[r1], #1 1921dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strmib r3, [r0], #1 1931dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r4, [r0], #1 1941dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r12,[r0], #1 195ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 1961dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectsrc_aligned: 1971dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 198ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* see if src and dst are aligned together (congruent) */ 1991dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project eor r12, r0, r1 2001dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project tst r12, #3 2011dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bne non_congruent 2021dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2031dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* Use post-incriment mode for stm to spill r5-r11 to reserved stack 2041dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * frame. Don't update sp. 2051dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 2061dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmea sp, {r5-r11} 2071dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2081dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* align the destination to a cache-line */ 2091dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project rsb r3, r0, #0 2101dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ands r3, r3, #0x1C 2111dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project beq congruent_aligned32 2121dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r3, r2 2131dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project andhi r3, r2, #0x1C 2141dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2151dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* conditionnaly copies 0 to 7 words (length in r3) */ 216ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian movs r12, r3, lsl #28 2171dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmcsia r1!, {r4, r5, r6, r7} /* 16 bytes */ 2181dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmmiia r1!, {r8, r9} /* 8 bytes */ 2191dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmcsia r0!, {r4, r5, r6, r7} 2201dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmmiia r0!, {r8, r9} 2211dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project tst r3, #0x4 2221dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrne r10,[r1], #4 /* 4 bytes */ 2231dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strne r10,[r0], #4 2241dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project sub r2, r2, r3 2251dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2261dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectcongruent_aligned32: 2271dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* 2281dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * here source is aligned to 32 bytes. 2291dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 2301dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2311dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectcached_aligned32: 2321dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project subs r2, r2, #32 2331dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo less_than_32_left 2341dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2351dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* 2361dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * We preload a cache-line up to 64 bytes ahead. On the 926, this will 237ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian * stall only until the requested world is fetched, but the linefill 2381dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * continues in the the background. 2391dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * While the linefill is going, we write our previous cache-line 2401dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * into the write-buffer (which should have some free space). 2411dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * When the linefill is done, the writebuffer will 2421dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * start dumping its content into memory 2431dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 2441dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * While all this is going, we then load a full cache line into 2451dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 8 registers, this cache line should be in the cache by now 2461dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * (or partly in the cache). 2471dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 2481dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * This code should work well regardless of the source/dest alignment. 2491dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 2501dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 2511dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2521dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // Align the preload register to a cache-line because the cpu does 2531dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // "critical word first" (the first word requested is loaded first). 2541dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bic r12, r1, #0x1F 2551dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project add r12, r12, #64 2561dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2571dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: ldmia r1!, { r4-r11 } 2581dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project PLD (r12, #64) 2591dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project subs r2, r2, #32 2601dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2611dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // NOTE: if r12 is more than 64 ahead of r1, the following ldrhi 2621dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // for ARM9 preload will not be safely guarded by the preceding subs. 263ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian // When it is safely guarded the only possibility to have SIGSEGV here 2641dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // is because the caller overstates the length. 2651dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrhi r3, [r12], #32 /* cheap ARM9 preload */ 2661dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmia r0!, { r4-r11 } 2671dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bhs 1b 268ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 2691dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project add r2, r2, #32 2701dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2711dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2721dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2731dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2741dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectless_than_32_left: 275ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* 2761dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * less than 32 bytes left at this point (length in r2) 2771dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 2781dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2791dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* skip all this if there is nothing to do, which should 2801dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * be a common case (if not executed the code below takes 2811dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * about 16 cycles) 2821dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 2831dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project tst r2, #0x1F 2841dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project beq 1f 2851dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2861dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* conditionnaly copies 0 to 31 bytes */ 287ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian movs r12, r2, lsl #28 2881dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmcsia r1!, {r4, r5, r6, r7} /* 16 bytes */ 2891dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmmiia r1!, {r8, r9} /* 8 bytes */ 2901dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmcsia r0!, {r4, r5, r6, r7} 2911dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmmiia r0!, {r8, r9} 2921dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movs r12, r2, lsl #30 2931dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrcs r3, [r1], #4 /* 4 bytes */ 2941dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrmih r4, [r1], #2 /* 2 bytes */ 295ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strcs r3, [r0], #4 2961dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strmih r4, [r0], #2 2971dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project tst r2, #0x1 2981dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrneb r3, [r1] /* last byte */ 2991dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strneb r3, [r0] 3001dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3011dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* we're done! restore everything and return */ 3021dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: ldmfd sp!, {r5-r11} 3031dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmfd sp!, {r0, r4, lr} 3041dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bx lr 3051dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3061dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /********************************************************************/ 3071dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3081dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectnon_congruent: 3091dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* 3101dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * here source is aligned to 4 bytes 3111dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * but destination is not. 3121dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 313ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian * in the code below r2 is the number of bytes read 3141dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * (the number of bytes written is always smaller, because we have 3151dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * partial words in the shift queue) 3161dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 3171dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r2, #4 3181dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo copy_last_3_and_return 319ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 3201dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* Use post-incriment mode for stm to spill r5-r11 to reserved stack 3211dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * frame. Don't update sp. 3221dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 3231dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmea sp, {r5-r11} 324ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 3251dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* compute shifts needed to align src to dest */ 3261dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project rsb r5, r0, #0 3271dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project and r5, r5, #3 /* r5 = # bytes in partial words */ 328ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian mov r12, r5, lsl #3 /* r12 = right */ 3291dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project rsb lr, r12, #32 /* lr = left */ 330ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 3311dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* read the first word */ 3321dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldr r3, [r1], #4 3331dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project sub r2, r2, #4 334ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 3351dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* write a partial word (0 to 3 bytes), such that destination 3361dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * becomes aligned to 32 bits (r5 = nb of words to copy for alignment) 3371dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 3381dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movs r5, r5, lsl #31 3391dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strmib r3, [r0], #1 340ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian movmi r3, r3, lsr #8 3411dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r3, [r0], #1 3421dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movcs r3, r3, lsr #8 3431dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r3, [r0], #1 3441dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movcs r3, r3, lsr #8 3451dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3461dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r2, #4 3471dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo partial_word_tail 348ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 3491dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* Align destination to 32 bytes (cache line boundary) */ 3501dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: tst r0, #0x1c 3511dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project beq 2f 3521dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldr r5, [r1], #4 3531dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project sub r2, r2, #4 3541dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r4, r3, r5, lsl lr 3551dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r3, r5, lsr r12 3561dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project str r4, [r0], #4 3571dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r2, #4 3581dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bhs 1b 3591dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo partial_word_tail 3601dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3611dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* copy 32 bytes at a time */ 3621dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project2: subs r2, r2, #32 3631dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo less_than_thirtytwo 3641dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3651dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* Use immediate mode for the shifts, because there is an extra cycle 3661dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * for register shifts, which could account for up to 50% of 3671dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * performance hit. 3681dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 3691dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3701dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r12, #24 3711dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project beq loop24 3721dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r12, #8 3731dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project beq loop8 3741dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3751dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectloop16: 3761dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldr r12, [r1], #4 3771dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: mov r4, r12 3781dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} 3791dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project PLD (r1, #64) 3801dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project subs r2, r2, #32 3811dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrhs r12, [r1], #4 3821dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r3, r3, r4, lsl #16 3831dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r4, r4, lsr #16 3841dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r4, r4, r5, lsl #16 3851dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r5, r5, lsr #16 3861dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r5, r5, r6, lsl #16 3871dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r6, r6, lsr #16 3881dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r6, r6, r7, lsl #16 3891dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r7, r7, lsr #16 3901dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r7, r7, r8, lsl #16 3911dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r8, r8, lsr #16 3921dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r8, r8, r9, lsl #16 3931dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r9, r9, lsr #16 3941dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r9, r9, r10, lsl #16 3951dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r10, r10, lsr #16 3961dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r10, r10, r11, lsl #16 3971dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} 3981dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r3, r11, lsr #16 3991dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bhs 1b 4001dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project b less_than_thirtytwo 4011dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4021dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectloop8: 4031dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldr r12, [r1], #4 4041dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: mov r4, r12 4051dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} 4061dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project PLD (r1, #64) 4071dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project subs r2, r2, #32 4081dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrhs r12, [r1], #4 4091dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r3, r3, r4, lsl #24 4101dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r4, r4, lsr #8 4111dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r4, r4, r5, lsl #24 4121dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r5, r5, lsr #8 4131dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r5, r5, r6, lsl #24 4141dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r6, r6, lsr #8 4151dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r6, r6, r7, lsl #24 4161dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r7, r7, lsr #8 4171dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r7, r7, r8, lsl #24 4181dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r8, r8, lsr #8 4191dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r8, r8, r9, lsl #24 4201dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r9, r9, lsr #8 4211dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r9, r9, r10, lsl #24 4221dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r10, r10, lsr #8 4231dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r10, r10, r11, lsl #24 4241dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} 4251dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r3, r11, lsr #8 4261dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bhs 1b 4271dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project b less_than_thirtytwo 4281dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4291dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectloop24: 4301dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldr r12, [r1], #4 4311dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: mov r4, r12 4321dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} 4331dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project PLD (r1, #64) 4341dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project subs r2, r2, #32 4351dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrhs r12, [r1], #4 4361dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r3, r3, r4, lsl #8 4371dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r4, r4, lsr #24 4381dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r4, r4, r5, lsl #8 4391dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r5, r5, lsr #24 4401dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r5, r5, r6, lsl #8 4411dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r6, r6, lsr #24 4421dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r6, r6, r7, lsl #8 4431dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r7, r7, lsr #24 4441dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r7, r7, r8, lsl #8 4451dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r8, r8, lsr #24 4461dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r8, r8, r9, lsl #8 4471dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r9, r9, lsr #24 4481dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r9, r9, r10, lsl #8 4491dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r10, r10, lsr #24 4501dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r10, r10, r11, lsl #8 4511dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} 4521dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r3, r11, lsr #24 4531dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bhs 1b 4541dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4551dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4561dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectless_than_thirtytwo: 4571dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* copy the last 0 to 31 bytes of the source */ 4581dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project rsb r12, lr, #32 /* we corrupted r12, recompute it */ 4591dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project add r2, r2, #32 4601dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r2, #4 4611dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo partial_word_tail 4621dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4631dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: ldr r5, [r1], #4 4641dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project sub r2, r2, #4 4651dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r4, r3, r5, lsl lr 4661dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r3, r5, lsr r12 4671dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project str r4, [r0], #4 4681dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r2, #4 4691dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bhs 1b 4701dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4711dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectpartial_word_tail: 4721dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* we have a partial word in the input buffer */ 4731dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movs r5, lr, lsl #(31-3) 4741dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strmib r3, [r0], #1 4751dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movmi r3, r3, lsr #8 4761dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r3, [r0], #1 4771dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movcs r3, r3, lsr #8 4781dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r3, [r0], #1 479ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 4801dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* Refill spilled registers from the stack. Don't update sp. */ 4811dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmfd sp, {r5-r11} 4821dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4831dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectcopy_last_3_and_return: 4841dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */ 4851dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrmib r2, [r1], #1 4861dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrcsb r3, [r1], #1 4871dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrcsb r12,[r1] 4881dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strmib r2, [r0], #1 4891dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r3, [r0], #1 4901dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r12,[r0] 4911dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4921dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* we're done! restore sp and spilled registers and return */ 4931dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project add sp, sp, #28 4941dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmfd sp!, {r0, r4, lr} 4951dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bx lr 496420878c6908cf9c2862888477ec3f424a06cf172Kenny RootEND(memcpy) 4971dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 498ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 499ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian#endif /* __ARM_ARCH__ < 7 */ 500